Documentation
¶
Index ¶
- Constants
- func DefaultCommonFieldDescriptions() map[string]string
- func IsCommonField(name string) bool
- func IsValidColumnName(name string) bool
- func IsValidColumnType(columnType string) bool
- type ColumnSchema
- type ColumnType
- type CommonFields
- type GetColumnDescriptions
- type GetDescription
- type Mode
- type ParquetTag
- type RowSchema
- type SchemaBuilder
- type SchemaMap
- type SourceEnrichment
Constants ¶
const DefaultIndex = "default"
Variables ¶
This section is empty.
Functions ¶
func DefaultCommonFieldDescriptions ¶
TODO improve these descriptions https://github.com/turbot/tailpipe-plugin-sdk/issues/83
func IsCommonField ¶
func IsValidColumnName ¶
IsValidColumnName checks if a column name is valid in DuckDB.
func IsValidColumnType ¶
IsValidColumnType checks if a column type is valid in DuckDB.
Types ¶
type ColumnSchema ¶
type ColumnSchema struct {
// SourceName refers to one of 2 things depdending on where the schema is used
// 1. When the schemas is used by a mapper, SourceName refers to the field name in the raw row data
// 2. When the schema is used by the JSONL conversion, SourceName refers to the column name in the JSONL
SourceName string `json:"-"`
ColumnName string `json:"name,omitempty"`
// DuckDB type for the column
Type string `json:"type"`
// for struct and struct[]
StructFields []*ColumnSchema `json:"struct_fields,omitempty"`
// the column description (optional)
Description string `json:"description,omitempty"`
}
func ColumnFromProto ¶
func ColumnFromProto(p *proto.ColumnSchema) *ColumnSchema
ColumnFromProto creates a new ColumnSchema from proto
func (*ColumnSchema) FullType ¶
func (c *ColumnSchema) FullType() string
type ColumnType ¶
type ColumnType struct {
// DuckDB type`
Type string
// for structs/maps/struct[]
ChildFields []*ColumnSchema
}
type CommonFields ¶
type CommonFields struct {
// Mandatory fields
TpID string `json:"tp_id"`
TpSourceType string `json:"tp_source_type"`
TpIngestTimestamp time.Time `json:"tp_ingest_timestamp"`
TpTimestamp time.Time `json:"tp_timestamp"`
// Hive fields
TpTable string `json:"tp_table"`
TpPartition string `json:"tp_partition"`
TpIndex string `json:"tp_index"`
TpDate time.Time `json:"tp_date" parquet:"type=DATE"`
// Optional fields
TpSourceIP *string `json:"tp_source_ip"`
TpDestinationIP *string `json:"tp_destination_ip"`
TpSourceName *string `json:"tp_source_name"`
TpSourceLocation *string `json:"tp_source_location"`
// Searchable
TpAkas []string `json:"tp_akas,omitempty"`
TpIps []string `json:"tp_ips,omitempty"`
TpTags []string `json:"tp_tags,omitempty"`
TpDomains []string `json:"tp_domains,omitempty"`
TpEmails []string `json:"tp_emails,omitempty"`
TpUsernames []string `json:"tp_usernames,omitempty"`
}
CommonFields represents the common fields with JSON tags
func (*CommonFields) AsMap ¶
func (c *CommonFields) AsMap() map[string]string
AsMap converts the CommonFields struct into a map[string]string.
func (*CommonFields) GetCommonFields ¶
func (c *CommonFields) GetCommonFields() CommonFields
GetCommonFields implements RowStruct
func (*CommonFields) InitialiseFromMap ¶
func (c *CommonFields) InitialiseFromMap(source map[string]string)
InitialiseFromMap initializes a CommonFields struct using a source map
func (*CommonFields) Validate ¶
func (c *CommonFields) Validate() error
Validate implements the Validatable interface and is used to validate that the required fields have been set it can also be overridden by RowStruct implementations to perform additional validation - in this case CommonFields.Validate() should be called first
type GetColumnDescriptions ¶
GetColumnDescriptions is an interface that can be implemented by a row struct to provide descriptions for each column
type GetDescription ¶
type GetDescription interface {
GetDescription() string
}
GetDescription is an interface that can be implemented by any struct that has a description it is used by tables to specify the description of the table
type Mode ¶
type Mode string
Mode values are set on the schema config which is provided in a dynamic table config
const ( // ModeFull means that the schema is fully defined (the default) ModeFull Mode = "full" // ModePartial means that the schema is dynamic and is partially defined ModePartial Mode = "partial" // ModeDynamic means that the schema is fully dynamic and will be determined at runtime // NOTE: we weill never explicitly specify this mode - as it means there is no defined schema ModeDynamic Mode = "dynamic" )
type ParquetTag ¶
ParquetTag represents the components of a parquet tag
func ParseParquetTag ¶
func ParseParquetTag(tag string) (*ParquetTag, error)
ParseParquetTag parses and validates a parquet tag string
type RowSchema ¶
type RowSchema struct {
Columns []*ColumnSchema `json:"columns"`
// should we include ALL source fields in addition to any defined columns, or ONLY include the columns defined
AutoMapSourceFields bool `json:"automap_source_fields"`
// should we exclude any source fields from the output (only applicable if automap_source_fields is true)
ExcludeSourceFields []string `json:"exclude_source_fields"`
// the table description (optional)
Description string `json:"description,omitempty"`
}
func RowSchemaFromProto ¶
func SchemaFromStruct ¶
func (*RowSchema) AsMap ¶
func (r *RowSchema) AsMap() map[string]*ColumnSchema
func (*RowSchema) InitialiseFromInferredSchema ¶
InitialiseFromInferredSchema populates this schema using an inferred row schema this is called from the CLI when we are trying to determine the full schema after receiving the first JSONL file it either adds all fields in the inferred schema (if AutoMapSourceFields is true) or just populate missing types if AutoMapSourceFields is false
type SchemaBuilder ¶
type SchemaBuilder struct {
// contains filtered or unexported fields
}
func NewSchemaBuilder ¶
func NewSchemaBuilder() *SchemaBuilder
func (*SchemaBuilder) SchemaFromStruct ¶
func (b *SchemaBuilder) SchemaFromStruct(s any) (*RowSchema, error)
type SourceEnrichment ¶
type SourceEnrichment struct {
// a map of metadata values the source has extracted - perhaps by parsing th artifact path with a grok pattern
Metadata map[string]string
// CommonFields - a set of common fields that are added to every row
CommonFields CommonFields
}
SourceEnrichment - is a set of metadata about a row - this is built by the row source and passed to the enrichment
func NewSourceEnrichment ¶
func NewSourceEnrichment(metadata map[string]string) *SourceEnrichment
func SourceEnrichmentFromProto ¶
func SourceEnrichmentFromProto(p *proto.SourceEnrichment) *SourceEnrichment
func (*SourceEnrichment) ToProto ¶
func (s *SourceEnrichment) ToProto() *proto.SourceEnrichment