importinto

package
v1.1.0-beta.0...-ab5c2a5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 30, 2026 License: Apache-2.0 Imports: 71 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var MaxConflictRowFileSize int64 = 8 * units.GiB

MaxConflictRowFileSize is the maximum size of the conflict row file. exported for testing.

View Source
var NewTaskRegisterWithTTL = utils.NewTaskRegisterWithTTL

NewTaskRegisterWithTTL is the ctor for TaskRegister. It is exported for testing.

Functions

func GetTaskImportedRows

func GetTaskImportedRows(ctx context.Context, jobID int64) (uint64, error)

GetTaskImportedRows gets the number of imported rows of a job. Note: for finished job, we can get the number of imported rows from task meta.

func IndexID2KVGroup

func IndexID2KVGroup(indexID int64) string

IndexID2KVGroup converts index id to kv group name. exported for test.

func NewImportExecutor

func NewImportExecutor(
	ctx context.Context,
	task *proto.Task,
	param taskexecutor.Param,
	store tidbkv.Storage,
) taskexecutor.TaskExecutor

NewImportExecutor creates a new import task executor.

func NewImportScheduler

func NewImportScheduler(
	ctx context.Context,
	task *proto.Task,
	param scheduler.Param,
	storeWithPD kv.StorageWithPD,
) scheduler.Scheduler

NewImportScheduler creates a new import scheduler.

func NewImportSchedulerForTest

func NewImportSchedulerForTest(globalSort bool) scheduler.Scheduler

NewImportSchedulerForTest creates a new import scheduler for test.

func NewPostProcessStepExecutor

func NewPostProcessStepExecutor(taskID int64, store tidbkv.Storage, taskMeta *TaskMeta, logger *zap.Logger) execute.StepExecutor

NewPostProcessStepExecutor creates a new post process step executor. exported for testing.

func SubmitStandaloneTask

func SubmitStandaloneTask(ctx context.Context, plan *importer.Plan, stmt string, ecp map[int32]*checkpoints.EngineCheckpoint) (int64, *proto.TaskBase, error)

SubmitStandaloneTask submits a task to the distribute framework that only runs on the current node. when import from server-disk, pass engine checkpoints too, as scheduler might run on another node where we can't access the data files.

func SubmitTask

func SubmitTask(ctx context.Context, plan *importer.Plan, stmt string) (int64, *proto.TaskBase, error)

SubmitTask submits a task to the distribute framework that runs on all managed nodes.

func TaskKey

func TaskKey(jobID int64) string

TaskKey returns the task key for a job.

Types

type Checksum

type Checksum struct {
	Sum  uint64
	KVs  uint64
	Size uint64
}

Checksum records the checksum information.

func (*Checksum) ToKVChecksum

func (c *Checksum) ToKVChecksum() *verification.KVChecksum

ToKVChecksum converts the Checksum to verification.KVChecksum.

type Chunk

type Chunk struct {
	Path         string
	FileSize     int64
	Offset       int64
	EndOffset    int64
	PrevRowIDMax int64
	RowIDMax     int64
	Type         mydump.SourceType
	Compression  mydump.Compression
	Timestamp    int64
}

Chunk records the chunk information.

type CollectConflictsSpec

type CollectConflictsSpec struct {
	*CollectConflictsStepMeta
}

CollectConflictsSpec is the specification of a conflict resolution pipeline.

func (*CollectConflictsSpec) ToSubtaskMeta

func (s *CollectConflictsSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)

ToSubtaskMeta converts the conflict resolution spec to subtask meta.

type CollectConflictsStepMeta

type CollectConflictsStepMeta struct {
	external.BaseExternalMeta
	Infos                   KVGroupConflictInfos `json:"infos" external:"true"`
	RecordedDataKVConflicts int64                `json:"recorded-data-kv-conflicts,omitempty"`
	// Checksum is the checksum of all conflicts rows.
	Checksum *Checksum `json:"checksum,omitempty"`
	// ConflictedRowCount is the count of all conflicted rows.
	ConflictedRowCount int64 `json:"conflicted-row-count,omitempty"`
	// ConflictedRowFilenames is the filenames of all conflicted rows.
	// Note: this file is for user to resolve conflicts manually.
	ConflictedRowFilenames []string `json:"conflicted-row-filenames,omitempty"`
	// TooManyConflictsFromIndex is true if there are too many conflicts from index.
	// if true, we will skip checksum.
	TooManyConflictsFromIndex bool `json:"too-many-conflicts-from-index,omitempty"`
}

CollectConflictsStepMeta is the meta of collect conflicts step.

func (*CollectConflictsStepMeta) Marshal

func (m *CollectConflictsStepMeta) Marshal() ([]byte, error)

Marshal marshals the collect conflicts step meta to JSON.

type ConflictResolutionSpec

type ConflictResolutionSpec struct {
	*ConflictResolutionStepMeta
}

ConflictResolutionSpec is the specification of a conflict resolution pipeline.

func (*ConflictResolutionSpec) ToSubtaskMeta

func (s *ConflictResolutionSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)

ToSubtaskMeta converts the conflict resolution spec to subtask meta.

type ConflictResolutionStepMeta

type ConflictResolutionStepMeta struct {
	external.BaseExternalMeta
	Infos KVGroupConflictInfos `json:"infos" external:"true"`
}

ConflictResolutionStepMeta is the meta of conflict resolution step.

func (*ConflictResolutionStepMeta) Marshal

func (m *ConflictResolutionStepMeta) Marshal() ([]byte, error)

Marshal marshals the conflict resolution step meta to JSON.

type ImportCleanUpS3

type ImportCleanUpS3 struct {
}

ImportCleanUpS3 implements scheduler.CleanUpRoutine.

func (*ImportCleanUpS3) CleanUp

func (*ImportCleanUpS3) CleanUp(ctx context.Context, task *proto.Task) error

CleanUp implements the CleanUpRoutine.CleanUp interface.

type ImportSpec

type ImportSpec struct {
	*ImportStepMeta
	Plan importer.Plan
}

ImportSpec is the specification of an import pipeline.

func (*ImportSpec) ToSubtaskMeta

func (s *ImportSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)

ToSubtaskMeta converts the import spec to subtask meta.

type ImportStepMeta

type ImportStepMeta struct {
	external.BaseExternalMeta
	// this is the engine ID, not the id in tidb_background_subtask table.
	ID       int32
	Chunks   []Chunk            `external:"true"`
	Checksum map[int64]Checksum // see KVGroupChecksum for definition of map key.
	Result   Result
	// MaxIDs stores the max id that have been used during encoding for each allocator type.
	// the max id is same among all allocator types for now, since we're using same base, see
	// NewPanickingAllocators for more info.
	MaxIDs map[autoid.AllocatorType]int64

	SortedDataMeta *external.SortedKVMeta `external:"true"`
	// SortedIndexMetas is a map from index id to its sorted kv meta.
	SortedIndexMetas map[int64]*external.SortedKVMeta `external:"true"`
	// it's the sum of all conflict KVs in all SortedKVMeta, we keep it here to
	// avoid get the external meta when no conflict KVs.
	RecordedConflictKVCount uint64
}

ImportStepMeta is the meta of import step. Scheduler will split the task into subtasks(FileInfos -> Chunks) All the field should be serializable.

func (*ImportStepMeta) Marshal

func (m *ImportStepMeta) Marshal() ([]byte, error)

Marshal marshals the import step meta to JSON.

type KVGroupConflictInfos

type KVGroupConflictInfos struct {
	ConflictInfos map[string]*common.ConflictInfo `json:"conflict-infos,omitempty"`
}

KVGroupConflictInfos is the conflict infos of a kv group.

type LogicalPlan

type LogicalPlan struct {
	JobID             int64
	Plan              importer.Plan
	Stmt              string
	EligibleInstances []*infosync.ServerInfo
	ChunkMap          map[int32][]Chunk
}

LogicalPlan represents a logical plan for import into.

func (*LogicalPlan) FromTaskMeta

func (p *LogicalPlan) FromTaskMeta(bs []byte) error

FromTaskMeta converts the task meta to logical plan.

func (*LogicalPlan) ToPhysicalPlan

func (p *LogicalPlan) ToPhysicalPlan(planCtx planner.PlanCtx) (*planner.PhysicalPlan, error)

ToPhysicalPlan converts the logical plan to physical plan.

func (*LogicalPlan) ToTaskMeta

func (p *LogicalPlan) ToTaskMeta() ([]byte, error)

ToTaskMeta converts the logical plan to task meta.

type MergeSortSpec

type MergeSortSpec struct {
	*MergeSortStepMeta
}

MergeSortSpec is the specification of a merge-sort pipeline.

func (*MergeSortSpec) ToSubtaskMeta

func (s *MergeSortSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)

ToSubtaskMeta converts the merge-sort spec to subtask meta.

type MergeSortStepMeta

type MergeSortStepMeta struct {
	external.BaseExternalMeta
	// KVGroup is the group name of the sorted kv, either dataKVGroup or index-id.
	KVGroup                 string   `json:"kv-group"`
	DataFiles               []string `json:"data-files" external:"true"`
	external.SortedKVMeta   `external:"true"`
	RecordedConflictKVCount uint64 `json:"recorded-conflict-kv-count,omitempty"`
}

MergeSortStepMeta is the meta of merge sort step.

func (*MergeSortStepMeta) Marshal

func (m *MergeSortStepMeta) Marshal() ([]byte, error)

Marshal the merge sort step meta to JSON.

type MiniTaskExecutor

type MiniTaskExecutor interface {
	Run(ctx context.Context, dataWriter, indexWriter backend.EngineWriter) error
}

MiniTaskExecutor is the interface for a minimal task executor. exported for testing.

type PostProcessSpec

type PostProcessSpec struct {
	// for checksum request
	Schema string
	Table  string
}

PostProcessSpec is the specification of a post process pipeline.

func (*PostProcessSpec) ToSubtaskMeta

func (*PostProcessSpec) ToSubtaskMeta(planCtx planner.PlanCtx) ([]byte, error)

ToSubtaskMeta converts the post process spec to subtask meta.

type PostProcessStepMeta

type PostProcessStepMeta struct {
	// accumulated checksum of all subtasks in encode step. See KVGroupChecksum
	// for definition of map key.
	Checksum map[int64]Checksum
	// DeletedRowsChecksum is the checksum of all deleted rows due to conflicts.
	DeletedRowsChecksum Checksum
	// TooManyConflictsFromIndex is true if there are too many conflicts from index.
	// if true, the DeletedRowsChecksum might not be accurate, we will skip checksum.
	TooManyConflictsFromIndex bool `json:"too-many-conflicts-from-index,omitempty"`
	// MaxIDs of max all max-ids of subtasks in import step.
	MaxIDs map[autoid.AllocatorType]int64
}

PostProcessStepMeta is the meta of post process step.

type Result

type Result struct {
	LoadedRowCnt     uint64
	ConflictedRowCnt uint64
}

Result records the metrics information. This portion of the code may be implemented uniformly in the framework in the future.

type SharedVars

type SharedVars struct {
	TableImporter *importer.TableImporter
	DataEngine    *backend.OpenedEngine
	IndexEngine   *backend.OpenedEngine

	Checksum *verification.KVGroupChecksum

	SortedDataMeta *external.SortedKVMeta
	// SortedIndexMetas is a map from index id to its sorted kv meta.
	SortedIndexMetas        map[int64]*external.SortedKVMeta
	RecordedConflictKVCount uint64
	ShareMu                 sync.Mutex
	// contains filtered or unexported fields
}

SharedVars is the shared variables of all minimal tasks in a subtask. This is because subtasks cannot directly obtain the results of the minimal subtask. All the fields should be concurrent safe.

type TaskMeta

type TaskMeta struct {
	// IMPORT INTO job id, see mysql.tidb_import_jobs.
	JobID  int64
	Plan   importer.Plan
	Stmt   string
	Result Result
	// eligible instances to run this task, we run on all instances if it's empty.
	// we only need this when run IMPORT INTO without distributed option now, i.e.
	// running on the instance that initiate the IMPORT INTO.
	EligibleInstances []*infosync.ServerInfo
	// the file chunks to import, when import from server file, we need to pass those
	// files to the framework scheduler which might run on another instance.
	// we use a map from engine ID to chunks since we need support split_file for CSV,
	// so need to split them into engines before passing to scheduler.
	ChunkMap map[int32][]Chunk
}

TaskMeta is the task of IMPORT INTO. All the field should be serializable.

type WriteIngestSpec

type WriteIngestSpec struct {
	*WriteIngestStepMeta
}

WriteIngestSpec is the specification of a write-ingest pipeline.

func (*WriteIngestSpec) ToSubtaskMeta

func (s *WriteIngestSpec) ToSubtaskMeta(planner.PlanCtx) ([]byte, error)

ToSubtaskMeta converts the write-ingest spec to subtask meta.

type WriteIngestStepMeta

type WriteIngestStepMeta struct {
	external.BaseExternalMeta
	KVGroup                 string `json:"kv-group"`
	external.SortedKVMeta   `json:"sorted-kv-meta" external:"true"`
	RecordedConflictKVCount uint64   `json:"recorded-conflict-kv-count,omitempty"`
	DataFiles               []string `json:"data-files" external:"true"`
	StatFiles               []string `json:"stat-files" external:"true"`
	RangeJobKeys            [][]byte `json:"range-job-keys" external:"true"`
	RangeSplitKeys          [][]byte `json:"range-split-keys" external:"true"`
	TS                      uint64   `json:"ts"`

	Result Result
}

WriteIngestStepMeta is the meta of write and ingest step. only used when global sort is enabled.

func (*WriteIngestStepMeta) Marshal

func (m *WriteIngestStepMeta) Marshal() ([]byte, error)

Marshal marshals the write ingest step meta to JSON.

Directories

Path Synopsis
Package mock is a generated GoMock package.
Package mock is a generated GoMock package.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL