importer

package
v0.5.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 31, 2025 License: Apache-2.0 Imports: 17 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AvroImporter added in v0.5.0

type AvroImporter struct {
	// contains filtered or unexported fields
}

AvroImporter handles importing Avro files into Iceberg tables

func NewAvroImporter added in v0.5.0

func NewAvroImporter(cfg *config.Config) (*AvroImporter, error)

NewAvroImporter creates a new Avro importer

func (*AvroImporter) Close added in v0.5.0

func (a *AvroImporter) Close() error

Close closes the importer and releases resources

func (*AvroImporter) GetTableLocation added in v0.5.0

func (a *AvroImporter) GetTableLocation(tableIdent table.Identifier) string

GetTableLocation returns the location where table data would be stored

func (*AvroImporter) ImportTable added in v0.5.0

func (a *AvroImporter) ImportTable(ctx context.Context, req ImportRequest) (*ImportResult, error)

ImportTable imports an Avro file into an Iceberg table

func (*AvroImporter) InferSchema added in v0.5.0

func (a *AvroImporter) InferSchema(avroFile string) (*Schema, *FileStats, error)

InferSchema reads an Avro file and infers the schema

type Field

type Field struct {
	Name     string `json:"name"`
	Type     string `json:"type"`
	Nullable bool   `json:"nullable"`
}

Field represents a single column in a schema

type FileStats

type FileStats struct {
	RecordCount int64 `json:"record_count"`
	FileSize    int64 `json:"file_size"`
	ColumnCount int   `json:"column_count"`
}

FileStats contains statistics about a Parquet file

type ImportRequest

type ImportRequest struct {
	ParquetFile    string
	TableIdent     table.Identifier
	NamespaceIdent table.Identifier
	Schema         *Schema
	Overwrite      bool
	PartitionBy    []string
}

ImportRequest contains all parameters for importing a table

type ImportResult

type ImportResult struct {
	TableIdent    table.Identifier
	RecordCount   int64
	DataSize      int64
	TableLocation string
}

ImportResult contains the results of a table import

type Importer added in v0.5.0

type Importer interface {
	// InferSchema reads a file and infers the schema
	InferSchema(filePath string) (*Schema, *FileStats, error)

	// GetTableLocation returns the location where table data would be stored
	GetTableLocation(tableIdent table.Identifier) string

	// ImportTable imports a file into an Iceberg table
	ImportTable(ctx context.Context, req ImportRequest) (*ImportResult, error)

	// Close closes the importer and releases resources
	Close() error
}

Importer defines the interface for file importers

type ImporterFactory added in v0.5.0

type ImporterFactory struct {
	// contains filtered or unexported fields
}

ImporterFactory creates importers based on file type

func NewImporterFactory added in v0.5.0

func NewImporterFactory(cfg *config.Config) *ImporterFactory

NewImporterFactory creates a new importer factory

func (*ImporterFactory) CreateImporter added in v0.5.0

func (f *ImporterFactory) CreateImporter(filePath string) (Importer, ImporterType, error)

CreateImporter creates an importer based on the file extension

func (*ImporterFactory) CreateImporterByType added in v0.5.0

func (f *ImporterFactory) CreateImporterByType(importerType ImporterType) (Importer, error)

CreateImporterByType creates an importer for a specific type

func (*ImporterFactory) DetectFileType added in v0.5.0

func (f *ImporterFactory) DetectFileType(filePath string) (ImporterType, error)

DetectFileType detects the file type based on file extension

func (*ImporterFactory) GetSupportedFormats added in v0.5.0

func (f *ImporterFactory) GetSupportedFormats() []string

GetSupportedFormats returns a list of supported file formats

type ImporterType added in v0.5.0

type ImporterType string

ImporterType represents the type of importer

const (
	ImporterTypeParquet ImporterType = "parquet"
	ImporterTypeAvro    ImporterType = "avro"
)

type ParquetImporter

type ParquetImporter struct {
	// contains filtered or unexported fields
}

ParquetImporter handles importing Parquet files into Iceberg tables

func NewParquetImporter

func NewParquetImporter(cfg *config.Config) (*ParquetImporter, error)

NewParquetImporter creates a new Parquet importer

func (*ParquetImporter) Close

func (p *ParquetImporter) Close() error

Close closes the importer and releases resources

func (*ParquetImporter) GetTableLocation

func (p *ParquetImporter) GetTableLocation(tableIdent table.Identifier) string

GetTableLocation returns the location where table data would be stored

func (*ParquetImporter) ImportTable

func (p *ParquetImporter) ImportTable(ctx context.Context, req ImportRequest) (*ImportResult, error)

ImportTable imports a Parquet file into an Iceberg table

func (*ParquetImporter) InferSchema

func (p *ParquetImporter) InferSchema(parquetFile string) (*Schema, *FileStats, error)

InferSchema reads a Parquet file and infers the schema

type Schema

type Schema struct {
	Fields []Field `json:"fields"`
}

Schema represents a simplified table schema

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL