internal

package

v0.5.0 Latest Latest Go to latest Published: Feb 27, 2026 License: Apache-2.0 Imports: 34 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/apache/iceberg-go

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func BigEndianToDecimal(buf []byte) (decimal.Decimal128, error)
func MakeSequencedChan[T any](bufferSize uint, source <-chan T, comesAfter, isNext func(a, b *T) bool, ...) <-chan T
func MapExec[T, S any](nWorkers int, slice iter.Seq[T], fn func(T) (S, error)) iter.Seq2[S, error]
func PartitionRecordValue(field iceberg.PartitionField, val iceberg.Literal, schema *iceberg.Schema) (iceberg.Optional[iceberg.Literal], error)
func TruncateUpperBoundBinary(val []byte, trunc int) []byte
func TruncateUpperBoundText(s string, trunc int) string
type DataFileStatistics
- func (d *DataFileStatistics) PartitionValue(field iceberg.PartitionField, sc *iceberg.Schema) any
- func (d *DataFileStatistics) ToDataFile(schema *iceberg.Schema, spec iceberg.PartitionSpec, path string, ...) iceberg.DataFile
type Enumerated
type FileFormat
- func FormatFromFileName(fileName string) FileFormat
- func GetFileFormat(format iceberg.FileFormat) FileFormat
type FileReader
type FileSource
- func GetFile(ctx context.Context, fs iceio.IO, dataFile iceberg.DataFile, isPosDeletes bool) (FileSource, error)
type Metadata
type MetricModeType
type MetricsMode
- func MatchMetricsMode(mode string) (MetricsMode, error)
type ParquetFileSource
- func (pfs *ParquetFileSource) GetReader(ctx context.Context) (FileReader, error)
type StatisticsCollector
type StatsAgg
type TypedStats
type WriteFileInfo

Constants ¶

View Source

const (
	ParquetRowGroupSizeBytesKey              = "write.parquet.row-group-size-bytes"
	ParquetRowGroupSizeBytesDefault          = 128 * 1024 * 1024 // 128 MB
	ParquetRowGroupLimitKey                  = "write.parquet.row-group-limit"
	ParquetRowGroupLimitDefault              = 1048576
	ParquetPageSizeBytesKey                  = "write.parquet.page-size-bytes"
	ParquetPageSizeBytesDefault              = 1024 * 1024 // 1 MB
	ParquetPageRowLimitKey                   = "write.parquet.page-row-limit"
	ParquetPageRowLimitDefault               = 20000
	ParquetDictSizeBytesKey                  = "write.parquet.dict-size-bytes"
	ParquetDictSizeBytesDefault              = 2 * 1024 * 1024 // 2 MB
	ParquetCompressionKey                    = "write.parquet.compression-codec"
	ParquetCompressionDefault                = "zstd"
	ParquetCompressionLevelKey               = "write.parquet.compression-level"
	ParquetCompressionLevelDefault           = -1
	ParquetBloomFilterMaxBytesKey            = "write.parquet.bloom-filter-max-bytes"
	ParquetBloomFilterMaxBytesDefault        = 1024 * 1024
	ParquetBloomFilterColumnEnabledKeyPrefix = "write.parquet.bloom-filter-enabled.column"
)

Variables ¶

This section is empty.

Functions ¶

func BigEndianToDecimal ¶ added in v0.2.0

func BigEndianToDecimal(buf []byte) (decimal.Decimal128, error)

func MakeSequencedChan ¶

func MakeSequencedChan[T any](bufferSize uint, source <-chan T, comesAfter, isNext func(a, b *T) bool, initial T) <-chan T

MakeSequencedChan creates a channel that outputs values in a given order based on the comesAfter and isNext functions. The values are read in from the provided source and then re-ordered before being sent to the output.

func MapExec ¶ added in v0.3.0

func MapExec[T, S any](nWorkers int, slice iter.Seq[T], fn func(T) (S, error)) iter.Seq2[S, error]

func PartitionRecordValue ¶ added in v0.3.0

func PartitionRecordValue(field iceberg.PartitionField, val iceberg.Literal, schema *iceberg.Schema) (iceberg.Optional[iceberg.Literal], error)

func TruncateUpperBoundBinary ¶ added in v0.3.0

func TruncateUpperBoundBinary(val []byte, trunc int) []byte

func TruncateUpperBoundText ¶ added in v0.3.0

func TruncateUpperBoundText(s string, trunc int) string

Types ¶

type DataFileStatistics ¶ added in v0.3.0

type DataFileStatistics struct {
	RecordCount     int64
	ColSizes        map[int]int64
	ValueCounts     map[int]int64
	NullValueCounts map[int]int64
	NanValueCounts  map[int]int64
	ColAggs         map[int]StatsAgg
	SplitOffsets    []int64
}

func (*DataFileStatistics) PartitionValue ¶ added in v0.3.0

func (d *DataFileStatistics) PartitionValue(field iceberg.PartitionField, sc *iceberg.Schema) any

func (*DataFileStatistics) ToDataFile ¶ added in v0.3.0

func (d *DataFileStatistics) ToDataFile(schema *iceberg.Schema, spec iceberg.PartitionSpec, path string, format iceberg.FileFormat, content iceberg.ManifestEntryContent, filesize int64, partitionValues map[int]any) iceberg.DataFile

type Enumerated ¶

type Enumerated[T any] struct {
	Value T
	Index int
	Last  bool
}

Enumerated is a quick way to represent a sequenced value that can be processed in parallel and then needs to be reordered.

type FileFormat ¶ added in v0.3.0

type FileFormat interface {
	Open(context.Context, iceio.IO, string) (FileReader, error)
	PathToIDMapping(*iceberg.Schema) (map[string]int, error)
	DataFileStatsFromMeta(rdr Metadata, statsCols map[int]StatisticsCollector, colMapping map[string]int) *DataFileStatistics
	GetWriteProperties(iceberg.Properties) any
	WriteDataFile(ctx context.Context, fs iceio.WriteFileIO, partitionValues map[int]any, info WriteFileInfo, batches []arrow.RecordBatch) (iceberg.DataFile, error)
}

func FormatFromFileName ¶ added in v0.3.0

func FormatFromFileName(fileName string) FileFormat

func GetFileFormat ¶ added in v0.3.0

func GetFileFormat(format iceberg.FileFormat) FileFormat

type FileReader ¶

type FileReader interface {
	io.Closer

	Metadata() Metadata
	SourceFileSize() int64
	Schema() (*arrow.Schema, error)
	// PrunedSchema takes in the list of projected field IDs and returns the arrow schema
	// that represents the underlying file schema with only the projected fields. It also
	// returns the indexes of the projected columns to allow reading *only* the needed
	// columns.
	PrunedSchema(projectedIDs map[int]struct{}, mapping iceberg.NameMapping) (*arrow.Schema, []int, error)
	// GetRecords returns a record reader for only the provided columns (using nil will read
	// all of the columns of the underlying file.) The `tester` is a function that can be used,
	// if non-nil, to filter aspects of the file such as skipping row groups in a parquet file.
	GetRecords(ctx context.Context, cols []int, tester any) (array.RecordReader, error)
	// ReadTable reads the entire file and returns it as an arrow table.
	ReadTable(context.Context) (arrow.Table, error)
}

type FileSource ¶

type FileSource interface {
	GetReader(context.Context) (FileReader, error)
}

func GetFile ¶

func GetFile(ctx context.Context, fs iceio.IO, dataFile iceberg.DataFile, isPosDeletes bool) (FileSource, error)

GetFile opens the given file using the provided file system.

The FileSource interface allows abstracting away the underlying file format while providing utilties to read the file as Arrow record batches.

type Metadata ¶ added in v0.3.0

type Metadata any

type MetricModeType ¶ added in v0.3.0

type MetricModeType string

const (
	MetricModeTruncate MetricModeType = "truncate"
	MetricModeNone     MetricModeType = "none"
	MetricModeCounts   MetricModeType = "counts"
	MetricModeFull     MetricModeType = "full"
)

type MetricsMode ¶ added in v0.3.0

type MetricsMode struct {
	Typ MetricModeType
	Len int
}

func MatchMetricsMode ¶ added in v0.3.0

func MatchMetricsMode(mode string) (MetricsMode, error)

type ParquetFileSource ¶

type ParquetFileSource struct {
	// contains filtered or unexported fields
}

func (*ParquetFileSource) GetReader ¶

func (pfs *ParquetFileSource) GetReader(ctx context.Context) (FileReader, error)

type StatisticsCollector ¶ added in v0.3.0

type StatisticsCollector struct {
	FieldID    int
	IcebergTyp iceberg.PrimitiveType
	Mode       MetricsMode
	ColName    string
}

type StatsAgg ¶ added in v0.3.0

type StatsAgg interface {
	Min() iceberg.Literal
	Max() iceberg.Literal
	Update(stats interface{ HasMinMax() bool })
	MinAsBytes() ([]byte, error)
	MaxAsBytes() ([]byte, error)
}

type TypedStats ¶ added in v0.3.0

type TypedStats[T iceberg.LiteralType] interface {
	Min() T
	Max() T
}

type WriteFileInfo ¶ added in v0.3.0

type WriteFileInfo struct {
	FileSchema *iceberg.Schema
	Spec       iceberg.PartitionSpec
	FileName   string
	StatsCols  map[int]StatisticsCollector
	WriteProps any
	Content    iceberg.ManifestEntryContent
}

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL