Documentation
¶
Index ¶
Constants ¶
const ( Bool = "Bool" Int8 = "Int8" Uint8 = "Uint8" Int16 = "Int16" Uint16 = "Uint16" Int32 = "Int32" Uint32 = "Uint32" Float32 = "Float32" SmallEnum = "SmallEnum" BigEnum = "BigEnum" UUID = "UUID" GeoPoint = "GeoPoint" GeoShape = "GeoShape" Int64 = "Int64" // array types ArrayBool = "Bool[]" ArrayInt8 = "Int8[]" ArrayUint8 = "Uint8[]" ArrayInt16 = "Int16[]" ArrayUint16 = "Uint16[]" ArrayInt32 = "Int32[]" ArrayUint32 = "Uint32[]" ArrayFloat32 = "Float32[]" ArraySmallEnum = "SmallEnum[]" ArrayBigEnum = "BigEnum[]" ArrayUUID = "UUID[]" ArrayGeoPoint = "GeoPoint[]" ArrayInt64 = "Int64[]" )
string representations of data types
const (
// EnumDelimiter
EnumDelimiter = "\u0000\n"
)
Variables ¶
var ( // ErrTableDoesNotExist indicates Table does not exist ErrTableDoesNotExist = errors.New("Table does not exist") // ErrTableAlreadyExist indicates Table already exists ErrTableAlreadyExist = errors.New("Table already exists") // ErrColumnDoesNotExist indicates Column does not exist error ErrColumnDoesNotExist = errors.New("Column does not exist") // ErrColumnAlreadyExist indicates Column already exists ErrColumnAlreadyExist = errors.New("Column already exists") // ErrColumnAlreadyDeleted indicates Column already deleted ErrColumnAlreadyDeleted = errors.New("Column already deleted") // ErrNotEnumColumn indicates Column is not enum type ErrNotEnumColumn = errors.New("Column is not enum type") // ErrShardDoesNotExist indicates Shard does not exist ErrShardDoesNotExist = errors.New("Shard does not exist") // ErrNotFactTable indicates table not a fact table ErrNotFactTable = errors.New("Table is not fact table") // ErrNotDimensionTable indicates table is not a dimension table ErrNotDimensionTable = errors.New("Table is not dimension table") // ErrWatcherAlreadyExist indicates table is not a dimension table ErrWatcherAlreadyExist = errors.New("Watcher already registered") // ErrDeleteTimeColumn indicates column is time column and cannot be deleted ErrDeleteTimeColumn = errors.New("Time column cannot be deleted") // ErrDeletePrimaryKeyColumn indicates column belongs to primary key cannot be deleted ErrDeletePrimaryKeyColumn = errors.New("Primary key column cannot be deleted") // ErrChangePrimaryKeyColumn indicates primary key columns cannot be changed ErrChangePrimaryKeyColumn = errors.New("Primary key column cannot be changed") // ErrAllColumnsInvalid indicates all columns are invalid ErrAllColumnsInvalid = errors.New("All columns are invalid") // ErrMissingPrimaryKey indicates a schema does not have primary key ErrMissingPrimaryKey = errors.New("Primary key columns not specified") // ErrColumnNonExist indicates a column used does not exist ErrColumnNonExist = errors.New("Column does not exist") // ErrColumnDeleted indicates a column used was deleted ErrColumnDeleted = errors.New("Column already deleted") // ErrInvalidDataType indicates invalid data type ErrInvalidDataType = errors.New("Invalid data type") // ErrIllegalSchemaVersion indicates new schema is not greater than old one ErrIllegalSchemaVersion = errors.New("New schema version not greater than old") // ErrSchemaUpdateNotAllowed indicates changes attemped on immutable fields ErrSchemaUpdateNotAllowed = errors.New("Illegal schame update on immutable field") // ErrInsufficientColumnCount indicates no column in a schame ErrInsufficientColumnCount = errors.New("Insufficient column count") // ErrReusingColumnIDNotAllowed indicates attempt to reuse id of deleted column ErrReusingColumnIDNotAllowed = errors.New("Reusing column id not allowed") // ErrIllegalChangeSortColumn indicates illegal changes on sort columns ErrIllegalChangeSortColumn = errors.New("Illegal changes on sort columns") // ErrDuplicatedColumn indicates a column is used more than onces in sort or pk columns ErrDuplicatedColumn = errors.New("Illegal deplicated use of column") // ErrDuplicatedColumnName indicates duplicated column name in same table ErrDuplicatedColumnName = errors.New("Duplicated column name found") ErrMissingTimeColumn = errors.New("Fact table has to have time column as first column") ErrTimeColumnDoesNotAllowDefault = errors.New("Time column does not allow default value") ErrDisallowMissingEventTime = errors.New("Can not disallow missing event time") // ErrTimeColumnDoesNotAllowHLLConfig indicates hll configured for time column ErrTimeColumnDoesNotAllowHLLConfig = errors.New("HLLConfig not allowed for time column") ErrHLLColumnDoesNotAllowDefaultValue = errors.New("hll column does not allow default value") ErrInvalidTableBatchSize = errors.New("Table batch size should be larger than zero") ErrInvalidPrimaryKeyBucketSize = errors.New("Table primary key bucket size should be larger than zero") )
Functions ¶
This section is empty.
Types ¶
type Column ¶
type Column struct {
// Immutable, columns cannot be renamed.
Name string `json:"name"`
// Immutable, columns cannot have their types changed.
Type string `json:"type"`
// Deleted columns are kept as placeholders in Table.Columns.
// read only: true
Deleted bool `json:"deleted,omitempty"`
// We store the default value as string here since it's from user input.
// Nil means the default value is NULL. Actual default value of column data type
// should be stored in memstore.
DefaultValue *string `json:"defaultValue,omitempty"`
// Whether to compare characters case insensitively for enum columns. It only matters
// for ingestion client as it's the place to concert enum strings to enum values.
CaseInsensitive bool `json:"caseInsensitive,omitempty"`
// Whether disable enum cases auto expansion.
DisableAutoExpand bool `json:"disableAutoExpand,omitempty"`
// Mutable column configs.
Config ColumnConfig `json:"config,omitempty"`
// HLLEnabled determines whether a column is enabled for hll cardinality estimation
// HLLConfig is immutable
HLLConfig HLLConfig `json:"hllConfig,omitempty"`
}
Column defines the schema of a column from MetaStore. swagger:model column
func (*Column) IsEnumColumn ¶
IsEnumColumn checks whether a column is enum column
func (*Column) IsOverwriteOnlyDataType ¶
IsOverwriteOnlyDataType checks whether a column is overwrite only
type ColumnConfig ¶
type ColumnConfig struct {
// ColumnEvictionConfig : For column level in-memory eviction, it’s the best
// effort TTL for in-memory data.
// Column level eviction has nothing to do with data availability, but based
// on how much data we pre-loaded, the major impact will be there for query
// performance. Here we bring in two priorities configs: Preloading days and
// Priority.
// - Preloading days is defined at each column level to indicate how many
// recent days data we want to preload to host memory. This is best effort
// operation.
// - Priority is defined at each column level to indicate the priority of
// each column. When data eviction happens, we will rely on column priority
// to decide which column will be evicted first.
// High number implies high priority.
PreloadingDays int `json:"preloadingDays,omitempty"`
Priority int64 `json:"priority,omitempty"`
}
ColumnConfig defines the schema of a column config that can be mutated by UpdateColumn API call. swagger:model columnConfig
type HLLConfig ¶ added in v0.0.2
type HLLConfig struct {
IsHLLColumn bool `json:"isHLLColumn,omitempty"`
}
HLLConfig defines hll configuration swagger:model hllConfig
type MetaStore ¶ added in v0.0.2
type MetaStore interface {
GetEnumDict(table, column string) ([]string, error)
// Sets the watcher for the specified enum column.
// Should only be called once for each enum column.
// Returns a events channel that emits enum cases starting from startCase,
// and a done channel for consumer to ack once the event is processed.
WatchEnumDictEvents(table, column string, startCase int) (events <-chan string, done chan<- struct{}, err error)
// Returns the latest archiving/live cutoff for the specified shard.
GetArchivingCutoff(table string, shard int) (uint32, error)
// PurgeArchiveBatches deletes the metadata related to the archive batch
PurgeArchiveBatches(table string, shard, batchIDStart, batchIDEnd int) error
// Returns the version to use for the specified archive batch and size of the batch with the
// specified archiving/live cutoff.
GetArchiveBatchVersion(table string, shard, batchID int, cutoff uint32) (uint32, uint32, int, error)
// Returns the latest snapshot version for the specified shard.
// the return value is: redoLogFile, offset, lastReadBatchID, lastReadBatchOffset
GetSnapshotProgress(table string, shard int) (int64, uint32, int32, uint32, error)
// Set the watcher for table shard ownership change events.
// Should only be called once.
// Returns an event channel that emits desired ownership states,
// and a done channel for consumer to ack once the event is processed.
WatchShardOwnershipEvents() (events <-chan ShardOwnership, done chan<- struct{}, err error)
// A subset of newly added columns can be appended to the end of
// ArchivingSortColumns by adding their index in columns to archivingSortColumns
// Update column config.
// Returns the assigned case IDs for each case string.
ExtendEnumDict(table, column string, enumCases []string) ([]int, error)
// List available archive batches
GetArchiveBatches(table string, shard int, start, end int32) ([]int, error)
// Adds a version and size for the specified archive batch.
AddArchiveBatchVersion(table string, shard, batchID int, version uint32, seqNum uint32, batchSize int) error
// WriteArchiveBatchVersion
OverwriteArchiveBatchVersion(table string, shard, batchID int, version uint32, seqNum uint32, batchSize int) error
// Updates the archiving/live cutoff time for the specified shard. This is used
// by the archiving job after each successful run.
UpdateArchivingCutoff(table string, shard int, cutoff uint32) error
// Updates the latest snapshot version for the specified shard.
UpdateSnapshotProgress(table string, shard int, redoLogFile int64, upsertBatchOffset uint32, lastReadBatchID int32, lastReadBatchOffset uint32) error
// Updates the latest redolog/offset that have been backfilled for the specified shard.
UpdateBackfillProgress(table string, shard int, redoLogFile int64, offset uint32) error
// Retrieve the latest redolog/offset that have been backfilled for the specified shard.
GetBackfillProgressInfo(table string, shard int) (int64, uint32, error)
// Update ingestion commit offset, used for kafka like streaming ingestion
UpdateRedoLogCommitOffset(table string, shard int, offset int64) error
// Get ingestion commit offset, used for kafka like streaming ingestion
GetRedoLogCommitOffset(table string, shard int) (int64, error)
// Update ingestion checkpoint offset, used for kafka like streaming ingestion
UpdateRedoLogCheckpointOffset(table string, shard int, offset int64) error
// Get ingestion checkpoint offset, used for kafka like streaming ingestion
GetRedoLogCheckpointOffset(table string, shard int) (int64, error)
TableSchemaWatchable
TableSchemaMutator
}
MetaStore defines interfaces of the external metastore, which can be implemented using file system, SQLite, Zookeeper etc.
type ShardOwnership ¶
ShardOwnership defines an instruction on whether the receiving instance should start to own or disown the specified table shard.
type Table ¶
type Table struct {
// Name of the table, immutable.
Name string `json:"name"`
// Index to Columns also serves as column IDs.
Columns []Column `json:"columns"`
// IDs of primary key columns. This field is immutable.
PrimaryKeyColumns []int `json:"primaryKeyColumns"`
// Whether this is a fact table.
IsFactTable bool `json:"isFactTable"`
// table configurations
Config TableConfig `json:"config"`
// Fact table only.
// IDs of columns to sort based upon.
ArchivingSortColumns []int `json:"archivingSortColumns,omitempty"`
// Incarnation gets incremented every time an table name is reused
// only used for controller managed schema in cluster setting
Incarnation int `json:"incarnation"`
// Version gets incremented every time when schema is updated
// only used for controller managed schema in cluster setting
Version int `json:"version"`
}
Table defines the schema and configurations of a table from MetaStore. swagger:model table
type TableConfig ¶
type TableConfig struct {
// Initial setting of number of buckets for primary key
// if equals to 0, default will be used
InitialPrimaryKeyNumBuckets int `json:"initPrimaryKeyNumBuckets,omitempty"`
// Size of each live batch, should be sufficiently large.
BatchSize int `json:"batchSize,omitempty" validate:"min=1"`
// Specifies how often to create a new redo log file.
RedoLogRotationInterval int `json:"redoLogRotationInterval,omitempty" validate:"min=1"`
// Specifies the size limit of a single redo log file.
MaxRedoLogFileSize int `json:"maxRedoLogFileSize,omitempty" validate:"min=1"`
// Number of minutes after event time before a record can be archived.
ArchivingDelayMinutes uint32 `json:"archivingDelayMinutes,omitempty" validate:"min=1"`
// Specifies how often archiving runs.
ArchivingIntervalMinutes uint32 `json:"archivingIntervalMinutes,omitempty" validate:"min=1"`
// Specifies how often backfill runs.
BackfillIntervalMinutes uint32 `json:"backfillIntervalMinutes,omitempty" validate:"min=1"`
// Upper limit of current backfill buffer size + backfilling buffer size.
BackfillMaxBufferSize int64 `json:"backfillMaxBufferSize,omitempty" validate:"min=1"`
// Backfill buffer size in bytes that will trigger a backfill job.
BackfillThresholdInBytes int64 `json:"backfillThresholdInBytes,omitempty" validate:"min=1"`
// Size of each live batch used by backfill job.
BackfillStoreBatchSize int `json:"backfillStoreBatchSize,omitempty" validate:"min=1"`
// Records with timestamp older than now - RecordRetentionInDays will be skipped
// during ingestion and backfill. 0 means unlimited days.
RecordRetentionInDays int `json:"recordRetentionInDays,omitempty" validate:"min=0"`
// Number of mutations to accumulate before creating a new snapshot.
SnapshotThreshold int `json:"snapshotThreshold,omitempty" validate:"min=1"`
// Specifies how often snapshot runs.
SnapshotIntervalMinutes int `json:"snapshotIntervalMinutes,omitempty" validate:"min=1"`
AllowMissingEventTime bool `json:"allowMissingEventTime,omitempty"`
}
TableConfig defines the table configurations that can be changed swagger:model tableConfig
type TableSchemaMutator ¶ added in v0.0.2
type TableSchemaMutator interface {
TableSchemaReader
CreateTable(table *Table) error
DeleteTable(name string) error
UpdateTableConfig(table string, config TableConfig) error
UpdateTable(table Table) error
// A subset of newly added columns can be appended to the end of
// ArchivingSortColumns by adding their index in columns to archivingSortColumns
AddColumn(table string, column Column, appendToArchivingSortOrder bool) error
// Update column config.
UpdateColumn(table string, column string, config ColumnConfig) error
DeleteColumn(table string, column string) error
}
TableSchemaMutator mutates table metadata
type TableSchemaReader ¶ added in v0.0.2
type TableSchemaReader interface {
ListTables() ([]string, error)
GetTable(name string) (*Table, error)
}
TableSchemaReader reads table schema
type TableSchemaWatchable ¶ added in v0.0.2
type TableSchemaWatchable interface {
// Sets the watcher for table list change (table deletion) events.
// Should only be called once.
// Returns a events channel that emits the entire table list on each table deletion event,
// and a done channel for consumer to ack once the event is processed.
WatchTableListEvents() (events <-chan []string, done chan<- struct{}, err error)
// Sets the watcher for table modification/addition events.
// Should only be called once.
// Returns a events channel that emits the table schema on each change event for given table,
// and a done channel for consumer to ack once the event is processed.
WatchTableSchemaEvents() (events <-chan *Table, done chan<- struct{}, err error)
}
TableSchemaWatchable watches table schema update events