Documentation
¶
Index ¶
- Variables
- func GetTaskImportedRows(ctx context.Context, jobID int64) (uint64, error)
- func NewImportExecutor(ctx context.Context, id string, task *proto.Task, ...) taskexecutor.TaskExecutor
- func NewImportScheduler(ctx context.Context, task *proto.Task, param scheduler.Param, ...) scheduler.Scheduler
- func NewPostProcessStepExecutor(taskID int64, store tidbkv.Storage, taskMeta *TaskMeta, logger *zap.Logger) execute.StepExecutor
- func SubmitStandaloneTask(ctx context.Context, plan *importer.Plan, stmt string, ...) (int64, *proto.TaskBase, error)
- func SubmitTask(ctx context.Context, plan *importer.Plan, stmt string) (int64, *proto.TaskBase, error)
- func TaskKey(jobID int64) string
- type Checksum
- type Chunk
- type ImportCleanUpS3
- type ImportSchedulerExt
- func (*ImportSchedulerExt) GetEligibleInstances(_ context.Context, task *proto.Task) ([]string, error)
- func (sch *ImportSchedulerExt) GetNextStep(task *proto.TaskBase) proto.Step
- func (*ImportSchedulerExt) IsRetryableErr(err error) bool
- func (sch *ImportSchedulerExt) OnDone(ctx context.Context, handle storage.TaskHandle, task *proto.Task) error
- func (sch *ImportSchedulerExt) OnNextSubtasksBatch(ctx context.Context, taskHandle storage.TaskHandle, task *proto.Task, ...) (resSubtaskMeta [][]byte, err error)
- func (sch *ImportSchedulerExt) OnTick(ctx context.Context, task *proto.Task)
- type ImportSpec
- type ImportStepMeta
- type LogicalPlan
- type MergeSortSpec
- type MergeSortStepMeta
- type MiniTaskExecutor
- type PostProcessSpec
- type PostProcessStepMeta
- type Result
- type SharedVars
- type TaskMeta
- type WriteIngestSpec
- type WriteIngestStepMeta
Constants ¶
This section is empty.
Variables ¶
var NewTaskRegisterWithTTL = utils.NewTaskRegisterWithTTL
NewTaskRegisterWithTTL is the ctor for TaskRegister. It is exported for testing.
Functions ¶
func GetTaskImportedRows ¶
GetTaskImportedRows gets the number of imported rows of a job. Note: for finished job, we can get the number of imported rows from task meta.
func NewImportExecutor ¶
func NewImportExecutor( ctx context.Context, id string, task *proto.Task, taskTable taskexecutor.TaskTable, store tidbkv.Storage, ) taskexecutor.TaskExecutor
NewImportExecutor creates a new import task executor.
func NewImportScheduler ¶
func NewImportScheduler( ctx context.Context, task *proto.Task, param scheduler.Param, storeWithPD kv.StorageWithPD, ) scheduler.Scheduler
NewImportScheduler creates a new import scheduler.
func NewPostProcessStepExecutor ¶
func NewPostProcessStepExecutor(taskID int64, store tidbkv.Storage, taskMeta *TaskMeta, logger *zap.Logger) execute.StepExecutor
NewPostProcessStepExecutor creates a new post process step executor. exported for testing.
func SubmitStandaloneTask ¶
func SubmitStandaloneTask(ctx context.Context, plan *importer.Plan, stmt string, ecp map[int32]*checkpoints.EngineCheckpoint) (int64, *proto.TaskBase, error)
SubmitStandaloneTask submits a task to the distribute framework that only runs on the current node. when import from server-disk, pass engine checkpoints too, as scheduler might run on another node where we can't access the data files.
Types ¶
type Chunk ¶
type Chunk struct {
Path string
FileSize int64
Offset int64
EndOffset int64
PrevRowIDMax int64
RowIDMax int64
Type mydump.SourceType
Compression mydump.Compression
Timestamp int64
}
Chunk records the chunk information.
type ImportCleanUpS3 ¶
type ImportCleanUpS3 struct {
}
ImportCleanUpS3 implements scheduler.CleanUpRoutine.
type ImportSchedulerExt ¶
type ImportSchedulerExt struct {
GlobalSort bool
// contains filtered or unexported fields
}
ImportSchedulerExt is an extension of ImportScheduler, exported for test.
func (*ImportSchedulerExt) GetEligibleInstances ¶
func (*ImportSchedulerExt) GetEligibleInstances(_ context.Context, task *proto.Task) ([]string, error)
GetEligibleInstances implements scheduler.Extension interface.
func (*ImportSchedulerExt) GetNextStep ¶
func (sch *ImportSchedulerExt) GetNextStep(task *proto.TaskBase) proto.Step
GetNextStep implements scheduler.Extension interface.
func (*ImportSchedulerExt) IsRetryableErr ¶
func (*ImportSchedulerExt) IsRetryableErr(err error) bool
IsRetryableErr implements scheduler.Extension interface.
func (*ImportSchedulerExt) OnDone ¶
func (sch *ImportSchedulerExt) OnDone(ctx context.Context, handle storage.TaskHandle, task *proto.Task) error
OnDone implements scheduler.Extension interface.
func (*ImportSchedulerExt) OnNextSubtasksBatch ¶
func (sch *ImportSchedulerExt) OnNextSubtasksBatch( ctx context.Context, taskHandle storage.TaskHandle, task *proto.Task, execIDs []string, nextStep proto.Step, ) ( resSubtaskMeta [][]byte, err error)
OnNextSubtasksBatch generate batch of next stage's plan.
type ImportSpec ¶
ImportSpec is the specification of an import pipeline.
func (*ImportSpec) ToSubtaskMeta ¶
func (s *ImportSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)
ToSubtaskMeta converts the import spec to subtask meta.
type ImportStepMeta ¶
type ImportStepMeta struct {
// this is the engine ID, not the id in tidb_background_subtask table.
ID int32
Chunks []Chunk
Checksum map[int64]Checksum // see KVGroupChecksum for definition of map key.
Result Result
// MaxIDs stores the max id that have been used during encoding for each allocator type.
// the max id is same among all allocator types for now, since we're using same base, see
// NewPanickingAllocators for more info.
MaxIDs map[autoid.AllocatorType]int64
SortedDataMeta *external.SortedKVMeta
// SortedIndexMetas is a map from index id to its sorted kv meta.
SortedIndexMetas map[int64]*external.SortedKVMeta
}
ImportStepMeta is the meta of import step. Scheduler will split the task into subtasks(FileInfos -> Chunks) All the field should be serializable.
type LogicalPlan ¶
type LogicalPlan struct {
JobID int64
Plan importer.Plan
Stmt string
EligibleInstances []*infosync.ServerInfo
ChunkMap map[int32][]Chunk
}
LogicalPlan represents a logical plan for import into.
func (*LogicalPlan) FromTaskMeta ¶
func (p *LogicalPlan) FromTaskMeta(bs []byte) error
FromTaskMeta converts the task meta to logical plan.
func (*LogicalPlan) ToPhysicalPlan ¶
func (p *LogicalPlan) ToPhysicalPlan(planCtx planner.PlanCtx) (*planner.PhysicalPlan, error)
ToPhysicalPlan converts the logical plan to physical plan.
func (*LogicalPlan) ToTaskMeta ¶
func (p *LogicalPlan) ToTaskMeta() ([]byte, error)
ToTaskMeta converts the logical plan to task meta.
type MergeSortSpec ¶
type MergeSortSpec struct {
*MergeSortStepMeta
}
MergeSortSpec is the specification of a merge-sort pipeline.
func (*MergeSortSpec) ToSubtaskMeta ¶
func (s *MergeSortSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)
ToSubtaskMeta converts the merge-sort spec to subtask meta.
type MergeSortStepMeta ¶
type MergeSortStepMeta struct {
// KVGroup is the group name of the sorted kv, either dataKVGroup or index-id.
KVGroup string `json:"kv-group"`
DataFiles []string `json:"data-files"`
external.SortedKVMeta `json:"sorted-kv-meta"`
}
MergeSortStepMeta is the meta of merge sort step.
type MiniTaskExecutor ¶
type MiniTaskExecutor interface {
Run(ctx context.Context, dataWriter, indexWriter backend.EngineWriter) error
}
MiniTaskExecutor is the interface for a minimal task executor. exported for testing.
type PostProcessSpec ¶
PostProcessSpec is the specification of a post process pipeline.
func (*PostProcessSpec) ToSubtaskMeta ¶
func (*PostProcessSpec) ToSubtaskMeta(planCtx planner.PlanCtx) ([]byte, error)
ToSubtaskMeta converts the post process spec to subtask meta.
type PostProcessStepMeta ¶
type PostProcessStepMeta struct {
// accumulated checksum of all subtasks in import step. See KVGroupChecksum for
// definition of map key.
Checksum map[int64]Checksum
// MaxIDs of max all max-ids of subtasks in import step.
MaxIDs map[autoid.AllocatorType]int64
}
PostProcessStepMeta is the meta of post process step.
type Result ¶
type Result struct {
LoadedRowCnt uint64
}
Result records the metrics information. This portion of the code may be implemented uniformly in the framework in the future.
type SharedVars ¶
type SharedVars struct {
// SortedIndexMetas is a map from index id to its sorted kv meta.
// contains filtered or unexported fields
}
SharedVars is the shared variables of all minimal tasks in a subtask. This is because subtasks cannot directly obtain the results of the minimal subtask. All the fields should be concurrent safe.
type TaskMeta ¶
type TaskMeta struct {
// IMPORT INTO job id, see mysql.tidb_import_jobs.
JobID int64
Plan importer.Plan
Stmt string
Result Result
// eligible instances to run this task, we run on all instances if it's empty.
// we only need this when run IMPORT INTO without distributed option now, i.e.
// running on the instance that initiate the IMPORT INTO.
EligibleInstances []*infosync.ServerInfo
// the file chunks to import, when import from server file, we need to pass those
// files to the framework scheduler which might run on another instance.
// we use a map from engine ID to chunks since we need support split_file for CSV,
// so need to split them into engines before passing to scheduler.
ChunkMap map[int32][]Chunk
}
TaskMeta is the task of IMPORT INTO. All the field should be serializable.
type WriteIngestSpec ¶
type WriteIngestSpec struct {
*WriteIngestStepMeta
}
WriteIngestSpec is the specification of a write-ingest pipeline.
func (*WriteIngestSpec) ToSubtaskMeta ¶
func (s *WriteIngestSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)
ToSubtaskMeta converts the write-ingest spec to subtask meta.
type WriteIngestStepMeta ¶
type WriteIngestStepMeta struct {
KVGroup string `json:"kv-group"`
external.SortedKVMeta `json:"sorted-kv-meta"`
DataFiles []string `json:"data-files"`
StatFiles []string `json:"stat-files"`
RangeJobKeys [][]byte `json:"range-job-keys"`
RangeSplitKeys [][]byte `json:"range-split-keys"`
TS uint64 `json:"ts"`
Result Result
}
WriteIngestStepMeta is the meta of write and ingest step. only used when global sort is enabled.