Documentation
¶
Index ¶
- Variables
- func GetTaskImportedRows(ctx context.Context, jobID int64) (uint64, error)
- func IndexID2KVGroup(indexID int64) string
- func NewImportExecutor(ctx context.Context, task *proto.Task, param taskexecutor.Param, ...) taskexecutor.TaskExecutor
- func NewImportScheduler(ctx context.Context, task *proto.Task, param scheduler.Param, ...) scheduler.Scheduler
- func NewImportSchedulerForTest(globalSort bool) scheduler.Scheduler
- func NewPostProcessStepExecutor(taskID int64, store tidbkv.Storage, taskMeta *TaskMeta, logger *zap.Logger) execute.StepExecutor
- func SubmitStandaloneTask(ctx context.Context, plan *importer.Plan, stmt string, ...) (int64, *proto.TaskBase, error)
- func SubmitTask(ctx context.Context, plan *importer.Plan, stmt string) (int64, *proto.TaskBase, error)
- func TaskKey(jobID int64) string
- type Checksum
- type Chunk
- type CollectConflictsSpec
- type CollectConflictsStepMeta
- type ConflictResolutionSpec
- type ConflictResolutionStepMeta
- type ImportCleanUpS3
- type ImportSpec
- type ImportStepMeta
- type KVGroupConflictInfos
- type LogicalPlan
- type MergeSortSpec
- type MergeSortStepMeta
- type MiniTaskExecutor
- type PostProcessSpec
- type PostProcessStepMeta
- type Result
- type SharedVars
- type TaskMeta
- type WriteIngestSpec
- type WriteIngestStepMeta
Constants ¶
This section is empty.
Variables ¶
var MaxConflictRowFileSize int64 = 8 * units.GiB
MaxConflictRowFileSize is the maximum size of the conflict row file. exported for testing.
var NewTaskRegisterWithTTL = utils.NewTaskRegisterWithTTL
NewTaskRegisterWithTTL is the ctor for TaskRegister. It is exported for testing.
Functions ¶
func GetTaskImportedRows ¶
GetTaskImportedRows gets the number of imported rows of a job. Note: for finished job, we can get the number of imported rows from task meta.
func IndexID2KVGroup ¶
IndexID2KVGroup converts index id to kv group name. exported for test.
func NewImportExecutor ¶
func NewImportExecutor( ctx context.Context, task *proto.Task, param taskexecutor.Param, store tidbkv.Storage, ) taskexecutor.TaskExecutor
NewImportExecutor creates a new import task executor.
func NewImportScheduler ¶
func NewImportScheduler( ctx context.Context, task *proto.Task, param scheduler.Param, storeWithPD kv.StorageWithPD, ) scheduler.Scheduler
NewImportScheduler creates a new import scheduler.
func NewImportSchedulerForTest ¶
NewImportSchedulerForTest creates a new import scheduler for test.
func NewPostProcessStepExecutor ¶
func NewPostProcessStepExecutor(taskID int64, store tidbkv.Storage, taskMeta *TaskMeta, logger *zap.Logger) execute.StepExecutor
NewPostProcessStepExecutor creates a new post process step executor. exported for testing.
func SubmitStandaloneTask ¶
func SubmitStandaloneTask(ctx context.Context, plan *importer.Plan, stmt string, ecp map[int32]*checkpoints.EngineCheckpoint) (int64, *proto.TaskBase, error)
SubmitStandaloneTask submits a task to the distribute framework that only runs on the current node. when import from server-disk, pass engine checkpoints too, as scheduler might run on another node where we can't access the data files.
Types ¶
type Checksum ¶
Checksum records the checksum information.
func (*Checksum) ToKVChecksum ¶
func (c *Checksum) ToKVChecksum() *verification.KVChecksum
ToKVChecksum converts the Checksum to verification.KVChecksum.
type Chunk ¶
type Chunk struct {
Path string
FileSize int64
Offset int64
EndOffset int64
PrevRowIDMax int64
RowIDMax int64
Type mydump.SourceType
Compression mydump.Compression
Timestamp int64
}
Chunk records the chunk information.
type CollectConflictsSpec ¶
type CollectConflictsSpec struct {
*CollectConflictsStepMeta
}
CollectConflictsSpec is the specification of a conflict resolution pipeline.
func (*CollectConflictsSpec) ToSubtaskMeta ¶
func (s *CollectConflictsSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)
ToSubtaskMeta converts the conflict resolution spec to subtask meta.
type CollectConflictsStepMeta ¶
type CollectConflictsStepMeta struct {
external.BaseExternalMeta
Infos KVGroupConflictInfos `json:"infos" external:"true"`
RecordedDataKVConflicts int64 `json:"recorded-data-kv-conflicts,omitempty"`
// Checksum is the checksum of all conflicts rows.
Checksum *Checksum `json:"checksum,omitempty"`
// ConflictedRowCount is the count of all conflicted rows.
ConflictedRowCount int64 `json:"conflicted-row-count,omitempty"`
// ConflictedRowFilenames is the filenames of all conflicted rows.
// Note: this file is for user to resolve conflicts manually.
ConflictedRowFilenames []string `json:"conflicted-row-filenames,omitempty"`
// TooManyConflictsFromIndex is true if there are too many conflicts from index.
// if true, we will skip checksum.
TooManyConflictsFromIndex bool `json:"too-many-conflicts-from-index,omitempty"`
}
CollectConflictsStepMeta is the meta of collect conflicts step.
func (*CollectConflictsStepMeta) Marshal ¶
func (m *CollectConflictsStepMeta) Marshal() ([]byte, error)
Marshal marshals the collect conflicts step meta to JSON.
type ConflictResolutionSpec ¶
type ConflictResolutionSpec struct {
*ConflictResolutionStepMeta
}
ConflictResolutionSpec is the specification of a conflict resolution pipeline.
func (*ConflictResolutionSpec) ToSubtaskMeta ¶
func (s *ConflictResolutionSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)
ToSubtaskMeta converts the conflict resolution spec to subtask meta.
type ConflictResolutionStepMeta ¶
type ConflictResolutionStepMeta struct {
external.BaseExternalMeta
Infos KVGroupConflictInfos `json:"infos" external:"true"`
}
ConflictResolutionStepMeta is the meta of conflict resolution step.
func (*ConflictResolutionStepMeta) Marshal ¶
func (m *ConflictResolutionStepMeta) Marshal() ([]byte, error)
Marshal marshals the conflict resolution step meta to JSON.
type ImportCleanUpS3 ¶
type ImportCleanUpS3 struct {
}
ImportCleanUpS3 implements scheduler.CleanUpRoutine.
type ImportSpec ¶
type ImportSpec struct {
*ImportStepMeta
Plan importer.Plan
}
ImportSpec is the specification of an import pipeline.
func (*ImportSpec) ToSubtaskMeta ¶
func (s *ImportSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)
ToSubtaskMeta converts the import spec to subtask meta.
type ImportStepMeta ¶
type ImportStepMeta struct {
external.BaseExternalMeta
// this is the engine ID, not the id in tidb_background_subtask table.
ID int32
Chunks []Chunk `external:"true"`
Checksum map[int64]Checksum // see KVGroupChecksum for definition of map key.
Result Result
// MaxIDs stores the max id that have been used during encoding for each allocator type.
// the max id is same among all allocator types for now, since we're using same base, see
// NewPanickingAllocators for more info.
MaxIDs map[autoid.AllocatorType]int64
SortedDataMeta *external.SortedKVMeta `external:"true"`
// SortedIndexMetas is a map from index id to its sorted kv meta.
SortedIndexMetas map[int64]*external.SortedKVMeta `external:"true"`
// it's the sum of all conflict KVs in all SortedKVMeta, we keep it here to
// avoid get the external meta when no conflict KVs.
RecordedConflictKVCount uint64
}
ImportStepMeta is the meta of import step. Scheduler will split the task into subtasks(FileInfos -> Chunks) All the field should be serializable.
func (*ImportStepMeta) Marshal ¶
func (m *ImportStepMeta) Marshal() ([]byte, error)
Marshal marshals the import step meta to JSON.
type KVGroupConflictInfos ¶
type KVGroupConflictInfos struct {
ConflictInfos map[string]*common.ConflictInfo `json:"conflict-infos,omitempty"`
}
KVGroupConflictInfos is the conflict infos of a kv group.
type LogicalPlan ¶
type LogicalPlan struct {
JobID int64
Plan importer.Plan
Stmt string
EligibleInstances []*infosync.ServerInfo
ChunkMap map[int32][]Chunk
}
LogicalPlan represents a logical plan for import into.
func (*LogicalPlan) FromTaskMeta ¶
func (p *LogicalPlan) FromTaskMeta(bs []byte) error
FromTaskMeta converts the task meta to logical plan.
func (*LogicalPlan) ToPhysicalPlan ¶
func (p *LogicalPlan) ToPhysicalPlan(planCtx planner.PlanCtx) (*planner.PhysicalPlan, error)
ToPhysicalPlan converts the logical plan to physical plan.
func (*LogicalPlan) ToTaskMeta ¶
func (p *LogicalPlan) ToTaskMeta() ([]byte, error)
ToTaskMeta converts the logical plan to task meta.
type MergeSortSpec ¶
type MergeSortSpec struct {
*MergeSortStepMeta
}
MergeSortSpec is the specification of a merge-sort pipeline.
func (*MergeSortSpec) ToSubtaskMeta ¶
func (s *MergeSortSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)
ToSubtaskMeta converts the merge-sort spec to subtask meta.
type MergeSortStepMeta ¶
type MergeSortStepMeta struct {
external.BaseExternalMeta
// KVGroup is the group name of the sorted kv, either dataKVGroup or index-id.
KVGroup string `json:"kv-group"`
DataFiles []string `json:"data-files" external:"true"`
external.SortedKVMeta `external:"true"`
RecordedConflictKVCount uint64 `json:"recorded-conflict-kv-count,omitempty"`
}
MergeSortStepMeta is the meta of merge sort step.
func (*MergeSortStepMeta) Marshal ¶
func (m *MergeSortStepMeta) Marshal() ([]byte, error)
Marshal the merge sort step meta to JSON.
type MiniTaskExecutor ¶
type MiniTaskExecutor interface {
Run(ctx context.Context, dataWriter, indexWriter backend.EngineWriter) error
}
MiniTaskExecutor is the interface for a minimal task executor. exported for testing.
type PostProcessSpec ¶
PostProcessSpec is the specification of a post process pipeline.
func (*PostProcessSpec) ToSubtaskMeta ¶
func (*PostProcessSpec) ToSubtaskMeta(planCtx planner.PlanCtx) ([]byte, error)
ToSubtaskMeta converts the post process spec to subtask meta.
type PostProcessStepMeta ¶
type PostProcessStepMeta struct {
// accumulated checksum of all subtasks in encode step. See KVGroupChecksum
// for definition of map key.
Checksum map[int64]Checksum
// DeletedRowsChecksum is the checksum of all deleted rows due to conflicts.
DeletedRowsChecksum Checksum
// TooManyConflictsFromIndex is true if there are too many conflicts from index.
// if true, the DeletedRowsChecksum might not be accurate, we will skip checksum.
TooManyConflictsFromIndex bool `json:"too-many-conflicts-from-index,omitempty"`
// MaxIDs of max all max-ids of subtasks in import step.
MaxIDs map[autoid.AllocatorType]int64
}
PostProcessStepMeta is the meta of post process step.
type Result ¶
Result records the metrics information. This portion of the code may be implemented uniformly in the framework in the future.
type SharedVars ¶
type SharedVars struct {
// SortedIndexMetas is a map from index id to its sorted kv meta.
// contains filtered or unexported fields
}
SharedVars is the shared variables of all minimal tasks in a subtask. This is because subtasks cannot directly obtain the results of the minimal subtask. All the fields should be concurrent safe.
type TaskMeta ¶
type TaskMeta struct {
// IMPORT INTO job id, see mysql.tidb_import_jobs.
JobID int64
Plan importer.Plan
Stmt string
Result Result
// eligible instances to run this task, we run on all instances if it's empty.
// we only need this when run IMPORT INTO without distributed option now, i.e.
// running on the instance that initiate the IMPORT INTO.
EligibleInstances []*infosync.ServerInfo
// the file chunks to import, when import from server file, we need to pass those
// files to the framework scheduler which might run on another instance.
// we use a map from engine ID to chunks since we need support split_file for CSV,
// so need to split them into engines before passing to scheduler.
ChunkMap map[int32][]Chunk
}
TaskMeta is the task of IMPORT INTO. All the field should be serializable.
type WriteIngestSpec ¶
type WriteIngestSpec struct {
*WriteIngestStepMeta
}
WriteIngestSpec is the specification of a write-ingest pipeline.
func (*WriteIngestSpec) ToSubtaskMeta ¶
func (s *WriteIngestSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)
ToSubtaskMeta converts the write-ingest spec to subtask meta.
type WriteIngestStepMeta ¶
type WriteIngestStepMeta struct {
external.BaseExternalMeta
KVGroup string `json:"kv-group"`
external.SortedKVMeta `json:"sorted-kv-meta" external:"true"`
RecordedConflictKVCount uint64 `json:"recorded-conflict-kv-count,omitempty"`
DataFiles []string `json:"data-files" external:"true"`
StatFiles []string `json:"stat-files" external:"true"`
RangeJobKeys [][]byte `json:"range-job-keys" external:"true"`
RangeSplitKeys [][]byte `json:"range-split-keys" external:"true"`
TS uint64 `json:"ts"`
Result Result
}
WriteIngestStepMeta is the meta of write and ingest step. only used when global sort is enabled.
func (*WriteIngestStepMeta) Marshal ¶
func (m *WriteIngestStepMeta) Marshal() ([]byte, error)
Marshal marshals the write ingest step meta to JSON.