types

package
v1.1.0-beta.0...-bedae51 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 6, 2026 License: Apache-2.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AnalysisJobJSON

type AnalysisJobJSON struct {
	Type               string            `json:"type"`
	TableID            int64             `json:"table_id"`
	Weight             float64           `json:"weight"`
	PartitionIDs       []int64           `json:"partition_ids"`
	IndexIDs           []int64           `json:"index_ids"`
	PartitionIndexIDs  map[int64][]int64 `json:"partition_index_ids"`
	Indicators         IndicatorsJSON    `json:"indicators"`
	HasNewlyAddedIndex bool              `json:"has_newly_added_index"`
}

AnalysisJobJSON represents the JSON format of an AnalysisJob.

type CacheUpdate

type CacheUpdate struct {
	Updated []*statistics.Table
	Deleted []int64
	Options UpdateOptions
}

CacheUpdate encapsulates changes to be made to the stats cache

type ColStatsTimeInfo

type ColStatsTimeInfo struct {
	LastUsedAt     *types.Time // last time the column is used
	LastAnalyzedAt *types.Time // last time the column is analyzed
}

ColStatsTimeInfo records usage information of this column stats.

type DDL

type DDL interface {
	// HandleDDLEvent handles ddl events.
	HandleDDLEvent(ctx context.Context, sctx sessionctx.Context, changeEvent *notifier.SchemaChangeEvent) error
	// DDLEventCh returns ddl events channel in handle.
	DDLEventCh() chan *notifier.SchemaChangeEvent
}

DDL is used to handle ddl events.

type GlobalStatsInfo

type GlobalStatsInfo struct {
	HistIDs []int64
	// When the `isIndex == 0`, HistIDs will be the column IDs.
	// Otherwise, HistIDs will only contain the index ID.
	IsIndex      int
	StatsVersion int
}

GlobalStatsInfo represents the contextual information pertaining to global statistics.

type IndexUsage

type IndexUsage interface {
	// NewSessionIndexUsageCollector creates a new Collector for a session.
	NewSessionIndexUsageCollector() *indexusage.SessionIndexUsageCollector

	// GCIndexUsage removes unnecessary index usage data.
	GCIndexUsage() error

	// StartWorker starts for the collector worker.
	StartWorker()

	// Close closes and waits for the index usage collector worker.
	Close()

	// GetIndexUsage returns the index usage information
	GetIndexUsage(tableID int64, indexID int64) indexusage.Sample
}

IndexUsage is an interface to define the function of collecting index usage stats.

type IndicatorsJSON

type IndicatorsJSON struct {
	ChangePercentage     string `json:"change_percentage"`
	TableSize            string `json:"table_size"`
	LastAnalysisDuration string `json:"last_analysis_duration"`
}

IndicatorsJSON represents the JSON format of Indicators.

type MetaUpdate

type MetaUpdate struct {
	PhysicalID  int64
	Count       int64
	ModifyCount int64
}

MetaUpdate records a meta update for a partition or table.

type NeededItemTask

type NeededItemTask struct {
	ToTimeout time.Time
	ResultCh  chan stmtctx.StatsLoadResult
	Item      model.StatsLoadItem
	Retry     int
}

NeededItemTask represents one needed column/indices with expire time.

type PartitionStatisticLoadTask

type PartitionStatisticLoadTask struct {
	JSONTable  *statsutil.JSONTable
	PhysicalID int64
}

PartitionStatisticLoadTask currently records a partition-level jsontable.

type PersistFunc

type PersistFunc func(ctx context.Context, jsonTable *statsutil.JSONTable, physicalID int64) error

PersistFunc is used to persist JSONTable in the partition level.

type PriorityQueueSnapshot

type PriorityQueueSnapshot struct {
	CurrentJobs     []AnalysisJobJSON `json:"current_jobs"`
	MustRetryTables []int64           `json:"must_retry_tables"`
}

PriorityQueueSnapshot is the snapshot of the stats priority queue.

type StatsAnalyze

type StatsAnalyze interface {
	// InsertAnalyzeJob inserts analyze job into mysql.analyze_jobs and gets job ID for further updating job.
	InsertAnalyzeJob(job *statistics.AnalyzeJob, instance string, procID uint64) error

	// StartAnalyzeJob updates the job status to `running` and sets the start time.
	// There is no guarantee that the job record will actually be updated. If the job fails to start, an error will be logged.
	// It is OK because this won't affect the analysis job's success.
	StartAnalyzeJob(job *statistics.AnalyzeJob)

	// UpdateAnalyzeJobProgress updates the current progress of the analyze job.
	// There is no guarantee that the job record will actually be updated. If the job fails to update, an error will be logged.
	// It is OK because this won't affect the analysis job's success.
	UpdateAnalyzeJobProgress(job *statistics.AnalyzeJob, rowCount int64)

	// FinishAnalyzeJob updates the job status to `finished`, sets the end time, and updates the job info.
	// There is no guarantee that the job record will actually be updated. If the job fails to finish, an error will be logged.
	// It is OK because this won't affect the analysis job's success.
	FinishAnalyzeJob(job *statistics.AnalyzeJob, failReason error, analyzeType statistics.JobType)

	// DeleteAnalyzeJobs deletes the analyze jobs whose update time is earlier than updateTime.
	DeleteAnalyzeJobs(updateTime time.Time) error

	// CleanupCorruptedAnalyzeJobsOnCurrentInstance cleans up the corrupted analyze job.
	// A corrupted analyze job is one that is in a 'pending' or 'running' state,
	// but is associated with a TiDB instance that is either not currently running or has been restarted.
	// We use current running analyze jobs to check whether the analyze job is corrupted.
	CleanupCorruptedAnalyzeJobsOnCurrentInstance(currentRunningProcessIDs map[uint64]struct{}) error

	// CleanupCorruptedAnalyzeJobsOnDeadInstances purges analyze jobs that are associated with non-existent instances.
	// This function is specifically designed to handle jobs that have become corrupted due to
	// their associated instances being removed from the current cluster.
	CleanupCorruptedAnalyzeJobsOnDeadInstances() error

	// HandleAutoAnalyze analyzes the outdated tables. (The change percent of the table exceeds the threshold)
	// It also analyzes newly created tables and newly added indexes.
	HandleAutoAnalyze() (analyzed bool)

	// CheckAnalyzeVersion checks whether all the statistics versions of this table's columns and indexes are the same.
	CheckAnalyzeVersion(tblInfo *model.TableInfo, physicalIDs []int64, version *int) bool

	// GetPriorityQueueSnapshot returns the stats priority queue.
	GetPriorityQueueSnapshot() (PriorityQueueSnapshot, error)

	// ClosePriorityQueue closes the stats priority queue if initialized.
	// NOTE: This does NOT stop the analyze worker. Only the priority queue is closed.
	ClosePriorityQueue()

	// Close closes the analyze worker.
	Close()
}

StatsAnalyze is used to handle auto-analyze and manage analyze jobs. We need to read all the tables's last_analyze_time, modified_count, and row_count into memory. Because the current auto analyze' scheduling needs the whole information.

type StatsCache

type StatsCache interface {
	// Close closes this cache.
	Close()

	// Clear clears this cache.
	Clear()

	// Update reads stats meta from store and updates the stats map.
	// To work with auto-analyze's needs, we'll update all table's stats meta into memory.
	Update(ctx context.Context, is infoschema.InfoSchema, tableAndPartitionIDs ...int64) error

	// MemConsumed returns its memory usage.
	MemConsumed() (size int64)

	// Get returns the specified table's stats.
	Get(tableID int64) (*statistics.Table, bool)

	// Put puts this table stats into the cache.
	Put(tableID int64, t *statistics.Table)

	// UpdateStatsCache applies a batch of changes to the cache
	UpdateStatsCache(update CacheUpdate)

	// GetNextCheckVersionWithOffset returns the last version with offset.
	// It is used to fetch updated statistics from the stats meta table.
	GetNextCheckVersionWithOffset() uint64

	// MaxTableStatsVersion returns the version of the current cache, which is defined as
	// the max table stats version the cache has in its lifecycle.
	MaxTableStatsVersion() uint64

	// Values returns all values in this cache.
	Values() []*statistics.Table

	// Len returns the length of this cache.
	Len() int

	// SetStatsCacheCapacity sets the cache's capacity.
	SetStatsCacheCapacity(capBytes int64)

	// Replace replaces this cache.
	Replace(cache StatsCache)

	// UpdateStatsHealthyMetrics updates stats healthy distribution metrics according to stats cache.
	UpdateStatsHealthyMetrics()

	// TriggerEvict triggers the cache to evict some items
	TriggerEvict()
}

StatsCache is used to manage all table statistics in memory.

type StatsGC

type StatsGC interface {
	// GCStats will garbage collect the useless stats' info.
	// For dropped tables, we will first update their version
	// so that other tidb could know that table is deleted.
	GCStats(is infoschema.InfoSchema, ddlLease time.Duration) (err error)

	// ClearOutdatedHistoryStats clear outdated historical stats.
	// Only for test.
	ClearOutdatedHistoryStats() error

	// DeleteTableStatsFromKV deletes table statistics from kv.
	// A statsID refers to statistic of a table or a partition.
	DeleteTableStatsFromKV(statsIDs []int64, soft bool) (err error)
}

StatsGC is used to GC unnecessary stats.

type StatsGlobal

type StatsGlobal interface {
	// MergePartitionStats2GlobalStatsByTableID merges partition stats to global stats by table ID.
	MergePartitionStats2GlobalStatsByTableID(sc sessionctx.Context,
		opts map[ast.AnalyzeOptionType]uint64, is infoschema.InfoSchema,
		info *GlobalStatsInfo,
		physicalID int64,
	) (err error)
}

StatsGlobal is used to manage partition table global stats.

type StatsHandle

type StatsHandle interface {
	// Pool is used to get a session or a goroutine to execute stats updating.
	handleutil.Pool

	// AutoAnalyzeProcIDGenerator is used to generate auto analyze proc ID.
	handleutil.AutoAnalyzeProcIDGenerator

	// LeaseGetter is used to get stats lease.
	handleutil.LeaseGetter

	// TableInfoGetter is used to get table meta info.
	handleutil.TableInfoGetter

	// GetPhysicalTableStats retrieves the statistics for a physical table from cache or creates a pseudo statistics table.
	// physicalTableID can be a table ID or partition ID.
	GetPhysicalTableStats(physicalTableID int64, tblInfo *model.TableInfo) *statistics.Table

	// GetNonPseudoPhysicalTableStats retrieves the statistics for a physical table from cache, but it will not return pseudo.
	// physicalTableID can be a table ID or partition ID.
	// Note: this function may return nil if the table is not found in the cache.
	GetNonPseudoPhysicalTableStats(physicalTableID int64) (*statistics.Table, bool)

	// StatsGC is used to do the GC job.
	StatsGC

	// StatsUsage is used to handle table delta and stats usage.
	StatsUsage

	// StatsHistory is used to manage historical stats.
	StatsHistory

	// StatsAnalyze is used to handle auto-analyze and manage analyze jobs.
	StatsAnalyze

	// StatsCache is used to manage all table statistics in memory.
	StatsCache

	// StatsLock is used to manage locked stats.
	StatsLock

	// StatsReadWriter is used to read and write stats to the storage.
	StatsReadWriter

	// StatsGlobal is used to manage partition table global stats.
	StatsGlobal

	// DDL is used to handle ddl events.
	DDL
}

StatsHandle is used to manage TiDB Statistics.

type StatsHistory

type StatsHistory interface {
	// RecordHistoricalStatsMeta records the historical stats meta in mysql.stats_meta_history one by one.
	RecordHistoricalStatsMeta(version uint64, source string, enforce bool, tableIDs ...int64)

	// CheckHistoricalStatsEnable check whether historical stats is enabled.
	CheckHistoricalStatsEnable() (enable bool, err error)

	// RecordHistoricalStatsToStorage records the given table's stats data to mysql.stats_history
	RecordHistoricalStatsToStorage(dbName string, tableInfo *model.TableInfo, physicalID int64, isPartition bool) (uint64, error)
}

StatsHistory is used to manage historical stats.

type StatsLock

type StatsLock interface {
	// LockTables add locked tables id to store.
	// - tables: tables that will be locked.
	// Return the message of skipped tables and error.
	LockTables(tables map[int64]*StatsLockTable) (skipped string, err error)

	// LockPartitions add locked partitions id to store.
	// If the whole table is locked, then skip all partitions of the table.
	// - tid: table id of which will be locked.
	// - tableName: table name of which will be locked.
	// - pidNames: partition ids of which will be locked.
	// Return the message of skipped tables and error.
	// Note: If the whole table is locked, then skip all partitions of the table.
	LockPartitions(
		tid int64,
		tableName string,
		pidNames map[int64]string,
	) (skipped string, err error)

	// RemoveLockedTables remove tables from table locked records.
	// - tables: tables of which will be unlocked.
	// Return the message of skipped tables and error.
	RemoveLockedTables(tables map[int64]*StatsLockTable) (skipped string, err error)

	// RemoveLockedPartitions remove partitions from table locked records.
	// - tid: table id of which will be unlocked.
	// - tableName: table name of which will be unlocked.
	// - pidNames: partition ids of which will be unlocked.
	// Note: If the whole table is locked, then skip all partitions of the table.
	RemoveLockedPartitions(
		tid int64,
		tableName string,
		pidNames map[int64]string,
	) (skipped string, err error)

	// GetLockedTables returns the locked status of the given tables.
	// Note: This function query locked tables from store, so please try to batch the query.
	GetLockedTables(tableIDs ...int64) (map[int64]struct{}, error)

	// GetTableLockedAndClearForTest for unit test only.
	GetTableLockedAndClearForTest() (map[int64]struct{}, error)
}

StatsLock is used to manage locked stats.

type StatsLockTable

type StatsLockTable struct {
	PartitionInfo map[int64]string
	// schema name + table name.
	FullName string
}

StatsLockTable is the table info of which will be locked.

type StatsReadWriter

type StatsReadWriter interface {
	// TableStatsFromStorage loads table stats info from storage.
	TableStatsFromStorage(tableInfo *model.TableInfo, physicalID int64, loadAll bool, snapshot uint64) (statsTbl *statistics.Table, err error)

	// LoadTablePartitionStats loads partition stats info from storage.
	LoadTablePartitionStats(tableInfo *model.TableInfo, partitionDef *model.PartitionDefinition) (*statistics.Table, error)

	// StatsMetaCountAndModifyCount reads count and modify_count for the given table from mysql.stats_meta.
	StatsMetaCountAndModifyCount(tableID int64) (count, modifyCount int64, err error)

	// LoadNeededHistograms will load histograms for those needed columns/indices and put them into the cache.
	LoadNeededHistograms(is infoschema.InfoSchema) (err error)

	// ReloadExtendedStatistics drops the cache for extended statistics and reload data from mysql.stats_extended.
	ReloadExtendedStatistics() error

	// SaveColOrIdxStatsToStorage save the column or index stats to storage.
	SaveColOrIdxStatsToStorage(tableID int64, count, modifyCount int64, isIndex int, hg *statistics.Histogram,
		cms *statistics.CMSketch, topN *statistics.TopN, statsVersion int, updateAnalyzeTime bool, source string) (err error)

	// SaveAnalyzeResultToStorage saves the analyze result to the storage.
	SaveAnalyzeResultToStorage(results *statistics.AnalyzeResults, analyzeSnapshot bool, source string) (err error)

	// SaveMetaToStorage saves the stats meta of a table to storage.
	// Use the param `refreshLastHistVer` to indicate whether we need to update the last_histograms_versions in stats_meta table.
	// Set it to true if the column/index stats is updated.
	SaveMetaToStorage(source string, needRefreshLastHistVer bool, metaUpdates ...MetaUpdate) (err error)

	// UpdateStatsMetaVersionForGC updates the version of mysql.stats_meta,
	// ensuring it is greater than the last garbage collection (GC) time.
	// The GC worker deletes old stats based on a safe time point,
	// calculated as now() - 10 * max(stats lease, ddl lease).
	// The range [last GC time, safe time point) is chosen to prevent
	// the simultaneous deletion of numerous stats, minimizing potential
	// performance issues.
	// This function ensures the version is updated beyond the last GC time,
	// allowing the GC worker to delete outdated stats.
	//
	// Explanation:
	// - ddl lease: 10
	// - stats lease: 3
	// - safe time point: now() - 10 * 10 = now() - 100
	// - now: 200
	// - last GC time: 90
	// - [last GC time, safe time point) = [90, 100)
	// - To trigger stats deletion, the version must be updated beyond 90.
	//
	// This safeguards scenarios where a table remains unchanged for an extended period.
	// For instance, if a table was created at time 90, and it's now time 200,
	// with the last GC time at 95 and the safe time point at 100,
	// updating the version beyond 95 ensures eventual deletion of stats.
	UpdateStatsMetaVersionForGC(physicalID int64) (err error)

	// ChangeGlobalStatsID changes the global stats ID.
	ChangeGlobalStatsID(from, to int64) (err error)

	// TableStatsToJSON dumps table stats to JSON.
	TableStatsToJSON(dbName string, tableInfo *model.TableInfo, physicalID int64, snapshot uint64) (*statsutil.JSONTable, error)

	// DumpStatsToJSON dumps statistic to json.
	DumpStatsToJSON(dbName string, tableInfo *model.TableInfo,
		historyStatsExec sqlexec.RestrictedSQLExecutor, dumpPartitionStats bool) (*statsutil.JSONTable, error)

	// DumpHistoricalStatsBySnapshot dumped json tables from mysql.stats_meta_history and mysql.stats_history.
	// As implemented in getTableHistoricalStatsToJSONWithFallback, if historical stats are nonexistent, it will fall back
	// to the latest stats, and these table names (and partition names) will be returned in fallbackTbls.
	DumpHistoricalStatsBySnapshot(
		dbName string,
		tableInfo *model.TableInfo,
		snapshot uint64,
	) (
		jt *statsutil.JSONTable,
		fallbackTbls []string,
		err error,
	)

	// DumpStatsToJSONBySnapshot dumps statistic to json.
	DumpStatsToJSONBySnapshot(dbName string, tableInfo *model.TableInfo, snapshot uint64, dumpPartitionStats bool) (*statsutil.JSONTable, error)

	// PersistStatsBySnapshot dumps statistic to json and call the function for each partition statistic to persist.
	// Notice:
	//  1. It might call the function `persist` with nil jsontable.
	//  2. It is only used by BR, so partitions' statistic are always dumped.
	PersistStatsBySnapshot(ctx context.Context, dbName string, tableInfo *model.TableInfo, snapshot uint64, persist PersistFunc) error

	// LoadStatsFromJSONConcurrently consumes concurrently the statistic task from `taskCh`.
	LoadStatsFromJSONConcurrently(ctx context.Context, tableInfo *model.TableInfo, taskCh chan *PartitionStatisticLoadTask, concurrencyForPartition int) error

	// LoadStatsFromJSON will load statistic from JSONTable, and save it to the storage.
	// In final, it will also udpate the stats cache.
	LoadStatsFromJSON(ctx context.Context, is infoschema.InfoSchema, jsonTbl *statsutil.JSONTable, concurrencyForPartition int) error

	// LoadStatsFromJSONNoUpdate will load statistic from JSONTable, and save it to the storage.
	LoadStatsFromJSONNoUpdate(ctx context.Context, is infoschema.InfoSchema, jsonTbl *statsutil.JSONTable, concurrencyForPartition int) error

	// InsertExtendedStats inserts a record into mysql.stats_extended and update version in mysql.stats_meta.
	InsertExtendedStats(statsName string, colIDs []int64, tp int, tableID int64, ifNotExists bool) (err error)

	// MarkExtendedStatsDeleted update the status of mysql.stats_extended to be `deleted` and the version of mysql.stats_meta.
	MarkExtendedStatsDeleted(statsName string, tableID int64, ifExists bool) (err error)

	// SaveExtendedStatsToStorage writes extended stats of a table into mysql.stats_extended.
	SaveExtendedStatsToStorage(tableID int64, extStats *statistics.ExtendedStatsColl, isLoad bool) (err error)
}

StatsReadWriter is used to read and write stats to the storage. TODO: merge and remove some methods.

type StatsSyncLoad

type StatsSyncLoad interface {
	// SendLoadRequests sends load requests to the channel.
	SendLoadRequests(sc *stmtctx.StatementContext, neededHistItems []model.StatsLoadItem, timeout time.Duration) error

	// SyncWaitStatsLoad will wait for the load requests to finish.
	SyncWaitStatsLoad(sc *stmtctx.StatementContext) error

	// AppendNeededItem appends a needed item to the channel.
	AppendNeededItem(task *NeededItemTask, timeout time.Duration) error

	// SubLoadWorker will start a goroutine to handle the load requests.
	SubLoadWorker(exit chan struct{}, exitWg *util.WaitGroupEnhancedWrapper)

	// HandleOneTask will handle one task.
	HandleOneTask(lastTask *NeededItemTask, exit chan struct{}) (task *NeededItemTask, err error)
}

StatsSyncLoad implement the sync-load feature.

type StatsUsage

type StatsUsage interface {

	// LoadColumnStatsUsage returns all columns' usage information.
	LoadColumnStatsUsage(loc *time.Location) (map[model.TableItemID]ColStatsTimeInfo, error)

	// GetPredicateColumns returns IDs of predicate columns, which are the columns whose stats are used(needed) when generating query plans.
	GetPredicateColumns(tableID int64) ([]int64, error)

	// CollectColumnsInExtendedStats returns IDs of the columns involved in extended stats.
	CollectColumnsInExtendedStats(tableID int64) ([]int64, error)

	IndexUsage

	// NewSessionStatsItem allocates a stats collector for a session.
	// TODO: use interface{} to avoid cycle import, remove this interface{}.
	NewSessionStatsItem() any

	// ResetSessionStatsList resets the sessions stats list.
	ResetSessionStatsList()

	// DumpStatsDeltaToKV sweeps the whole list and updates the global map, then we dumps every table that held in map to KV.
	DumpStatsDeltaToKV(dumpAll bool) error

	// DumpColStatsUsageToKV sweeps the whole list, updates the column stats usage map and dumps it to KV.
	DumpColStatsUsageToKV() error
}

StatsUsage is used to track the usage of column / index statistics.

type UpdateOptions

type UpdateOptions struct {
	// SkipMoveForward controls whether to skip updating the cache's max version number.
	// When true, the cache max version number stays unchanged even after updates.
	// This improves performance when analyzing a small number of tables by avoiding
	// unnecessary full cache reloads that would normally be triggered by version changes.
	SkipMoveForward bool
}

UpdateOptions contains configuration for cache updates

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL