Documentation
¶
Index ¶
- Constants
- Variables
- func ArrowSchemaToIceberg(sc *arrow.Schema, downcastNsTimestamp bool, nameMapping iceberg.NameMapping) (*iceberg.Schema, error)
- func ArrowSchemaToIcebergWithFreshIDs(sc *arrow.Schema, downcastNsTimestamp bool) (*iceberg.Schema, error)
- func ArrowTypeToIceberg(dt arrow.DataType, downcastNsTimestamp bool) (iceberg.Type, error)
- func NewAddPartitionSpecUpdate(spec *iceberg.PartitionSpec, initial bool) *addPartitionSpecUpdate
- func NewAddSchemaUpdate(schema *iceberg.Schema) *addSchemaUpdate
- func NewAddSnapshotUpdate(snapshot *Snapshot) *addSnapshotUpdate
- func NewAddSortOrderUpdate(sortOrder *SortOrder) *addSortOrderUpdate
- func NewAssignUUIDUpdate(uuid uuid.UUID) *assignUUIDUpdate
- func NewRemovePropertiesUpdate(removals []string) *removePropertiesUpdate
- func NewRemoveSchemasUpdate(schemaIds []int) *removeSchemasUpdate
- func NewRemoveSnapshotRefUpdate(ref string) *removeSnapshotRefUpdate
- func NewRemoveSnapshotsUpdate(ids []int64) *removeSnapshotsUpdate
- func NewRemoveSpecUpdate(specIds []int) *removeSpecUpdate
- func NewSetCurrentSchemaUpdate(id int) *setCurrentSchemaUpdate
- func NewSetDefaultSortOrderUpdate(id int) *setDefaultSortOrderUpdate
- func NewSetDefaultSpecUpdate(id int) *setDefaultSpecUpdate
- func NewSetPropertiesUpdate(updates iceberg.Properties) *setPropertiesUpdate
- func NewSetSnapshotRefUpdate(name string, snapshotID int64, refType RefType, ...) *setSnapshotRefUpdate
- func NewUpgradeFormatVersionUpdate(formatVersion int) *upgradeFormatVersionUpdate
- func NewWriterFactory(rootLocation string, args recordWritingArgs, meta *MetadataBuilder, ...) writerFactory
- func SchemaToArrowSchema(sc *iceberg.Schema, metadata map[string]string, ...) (*arrow.Schema, error)
- func ToRequestedSchema(ctx context.Context, requested, fileSchema *iceberg.Schema, ...) (arrow.RecordBatch, error)
- func TypeToArrowType(t iceberg.Type, includeFieldIDs bool, useLargeTypes bool) (arrow.DataType, error)
- func VisitArrowSchema[T any](sc *arrow.Schema, visitor ArrowSchemaVisitor[T]) (res T, err error)
- func WithMaxRefAgeMs(maxRefAgeMs int64) setSnapshotRefOption
- func WithMaxSnapshotAgeMs(maxSnapshotAgeMs int64) setSnapshotRefOption
- func WithMinSnapshotsToKeep(minSnapshotsToKeep int) setSnapshotRefOption
- type ArrowSchemaVisitor
- type BlobMetadata
- type BlobType
- type CatalogIO
- type ColumnUpdate
- type DeleteOption
- type ErrIncompatibleSchema
- type ExpireSnapshotsOpt
- type FSysF
- type FileScanTask
- type Identifier
- type IncompatibleField
- type InvalidDefault
- type LocationProvider
- type Metadata
- func NewMetadata(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, ...) (Metadata, error)
- func NewMetadataWithUUID(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, ...) (Metadata, error)
- func ParseMetadata(r io.Reader) (Metadata, error)
- func ParseMetadataBytes(b []byte) (Metadata, error)
- func ParseMetadataString(s string) (Metadata, error)
- func UpdateTableMetadata(base Metadata, updates []Update, metadataLoc string) (Metadata, error)
- type MetadataBuilder
- func (b *MetadataBuilder) AddPartitionSpec(spec *iceberg.PartitionSpec, initial bool) error
- func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema) error
- func (b *MetadataBuilder) AddSnapshot(snapshot *Snapshot) error
- func (b *MetadataBuilder) AddSortOrder(sortOrder *SortOrder) error
- func (b *MetadataBuilder) AppendMetadataLog(entry MetadataLogEntry) *MetadataBuilder
- func (b *MetadataBuilder) Build() (Metadata, error)
- func (b *MetadataBuilder) CurrentSchema() *iceberg.Schema
- func (b *MetadataBuilder) CurrentSpec() (*iceberg.PartitionSpec, error)
- func (b *MetadataBuilder) GetSchemaByID(id int) (*iceberg.Schema, error)
- func (b *MetadataBuilder) GetSortOrderByID(id int) (*SortOrder, error)
- func (b *MetadataBuilder) GetSpecByID(id int) (*iceberg.PartitionSpec, error)
- func (b *MetadataBuilder) HasChanges() bool
- func (b *MetadataBuilder) LastUpdatedMS() int64
- func (b *MetadataBuilder) NameMapping() iceberg.NameMapping
- func (b *MetadataBuilder) NextRowID() int64
- func (b *MetadataBuilder) RemovePartitionSpecs(ints []int) error
- func (b *MetadataBuilder) RemoveProperties(keys []string) error
- func (b *MetadataBuilder) RemoveSchemas(ints []int) error
- func (b *MetadataBuilder) RemoveSnapshotRef(name string) error
- func (b *MetadataBuilder) RemoveSnapshots(snapshotIds []int64) error
- func (b *MetadataBuilder) SetCurrentSchemaID(currentSchemaID int) error
- func (b *MetadataBuilder) SetDefaultSortOrderID(defaultSortOrderID int) error
- func (b *MetadataBuilder) SetDefaultSpecID(defaultSpecID int) error
- func (b *MetadataBuilder) SetFormatVersion(formatVersion int) error
- func (b *MetadataBuilder) SetLastUpdatedMS() *MetadataBuilder
- func (b *MetadataBuilder) SetLoc(loc string) error
- func (b *MetadataBuilder) SetProperties(props iceberg.Properties) error
- func (b *MetadataBuilder) SetSnapshotRef(name string, snapshotID int64, refType RefType, ...) error
- func (b *MetadataBuilder) SetUUID(uuid uuid.UUID) error
- func (b *MetadataBuilder) SnapshotByID(id int64) (*Snapshot, error)
- func (b *MetadataBuilder) TrimMetadataLogs(maxEntries int) *MetadataBuilder
- type MetadataLogEntry
- type MoveOp
- type NullOrder
- type Operation
- type OrphanCleanupOption
- func WithDeleteFunc(deleteFunc func(string) error) OrphanCleanupOption
- func WithDryRun(enabled bool) OrphanCleanupOption
- func WithEqualAuthorities(authorities map[string]string) OrphanCleanupOption
- func WithEqualSchemes(schemes map[string]string) OrphanCleanupOption
- func WithFilesOlderThan(duration time.Duration) OrphanCleanupOption
- func WithLocation(location string) OrphanCleanupOption
- func WithMaxConcurrency(maxWorkers int) OrphanCleanupOption
- func WithPrefixMismatchMode(mode PrefixMismatchMode) OrphanCleanupOption
- type OrphanCleanupResult
- type OverwriteOption
- type PartitionStatisticsFile
- type PrefixMismatchMode
- type ReassignedIds
- type RefType
- type Requirement
- func AssertCreate() Requirement
- func AssertCurrentSchemaID(id int) Requirement
- func AssertDefaultSortOrderID(id int) Requirement
- func AssertDefaultSpecID(id int) Requirement
- func AssertLastAssignedFieldID(id int) Requirement
- func AssertLastAssignedPartitionID(id int) Requirement
- func AssertRefSnapshotID(ref string, id *int64) Requirement
- func AssertTableUUID(uuid uuid.UUID) Requirement
- func ParseRequirement(r io.Reader) (Requirement, error)
- func ParseRequirementBytes(b []byte) (Requirement, error)
- func ParseRequirementString(s string) (Requirement, error)
- type Requirements
- type RollingDataWriter
- type Scan
- func (scan *Scan) PlanFiles(ctx context.Context) ([]FileScanTask, error)
- func (scan *Scan) Projection() (*iceberg.Schema, error)
- func (scan *Scan) Snapshot() *Snapshot
- func (scan *Scan) ToArrowRecords(ctx context.Context) (*arrow.Schema, iter.Seq2[arrow.RecordBatch, error], error)
- func (scan *Scan) ToArrowTable(ctx context.Context) (arrow.Table, error)
- func (scan *Scan) UseRef(name string) (*Scan, error)
- func (scan *Scan) UseRowLimit(n int64) *Scan
- type ScanOption
- func WitMaxConcurrency(n int) ScanOption
- func WithCaseSensitive(b bool) ScanOption
- func WithLimit(n int64) ScanOption
- func WithOptions(opts iceberg.Properties) ScanOption
- func WithRowFilter(e iceberg.BooleanExpression) ScanOption
- func WithSelectedFields(fields ...string) ScanOption
- func WithSnapshotAsOf(timeStampMs int64) ScanOption
- func WithSnapshotID(n int64) ScanOption
- type SequenceNumberValidator
- type Snapshot
- type SnapshotLogEntry
- type SnapshotRef
- type SnapshotSummaryCollector
- type SortDirection
- type SortField
- type SortOrder
- func (s *SortOrder) CheckCompatibility(schema *iceberg.Schema) error
- func (s SortOrder) Equals(rhs SortOrder) bool
- func (s SortOrder) Fields() iter.Seq[SortField]
- func (s SortOrder) IsUnsorted() bool
- func (s SortOrder) Len() int
- func (s SortOrder) MarshalJSON() ([]byte, error)
- func (s SortOrder) OrderID() int
- func (s SortOrder) String() string
- func (s *SortOrder) UnmarshalJSON(b []byte) error
- type StagedTable
- type StatisticsFile
- type Summary
- type Table
- func (t Table) AllManifests(ctx context.Context) iter.Seq2[iceberg.ManifestFile, error]
- func (t Table) Append(ctx context.Context, rdr array.RecordReader, snapshotProps iceberg.Properties) (*Table, error)
- func (t Table) AppendTable(ctx context.Context, tbl arrow.Table, batchSize int64, ...) (*Table, error)
- func (t Table) CurrentSnapshot() *Snapshot
- func (t Table) Delete(ctx context.Context, filter iceberg.BooleanExpression, ...) (*Table, error)
- func (t Table) DeleteOrphanFiles(ctx context.Context, opts ...OrphanCleanupOption) (OrphanCleanupResult, error)
- func (t Table) Equals(other Table) bool
- func (t Table) FS(ctx context.Context) (icebergio.IO, error)
- func (t Table) Identifier() Identifier
- func (t Table) Location() string
- func (t Table) LocationProvider() (LocationProvider, error)
- func (t Table) Metadata() Metadata
- func (t Table) MetadataLocation() string
- func (t Table) NameMapping() iceberg.NameMapping
- func (t Table) NewTransaction() *Transaction
- func (t Table) Overwrite(ctx context.Context, rdr array.RecordReader, snapshotProps iceberg.Properties, ...) (*Table, error)
- func (t Table) OverwriteTable(ctx context.Context, tbl arrow.Table, batchSize int64, ...) (*Table, error)
- func (t Table) Properties() iceberg.Properties
- func (t *Table) Refresh(ctx context.Context) error
- func (t Table) Scan(opts ...ScanOption) *Scan
- func (t Table) Schema() *iceberg.Schema
- func (t Table) Schemas() map[int]*iceberg.Schema
- func (t Table) SnapshotAsOf(timestampMs int64, inclusive bool) *Snapshot
- func (t Table) SnapshotByID(id int64) *Snapshot
- func (t Table) SnapshotByName(name string) *Snapshot
- func (t Table) SortOrder() SortOrder
- func (t Table) Spec() iceberg.PartitionSpec
- type Transaction
- func (t *Transaction) AddDataFiles(ctx context.Context, dataFiles []iceberg.DataFile, ...) error
- func (t *Transaction) AddFiles(ctx context.Context, files []string, snapshotProps iceberg.Properties, ...) error
- func (t *Transaction) Append(ctx context.Context, rdr array.RecordReader, snapshotProps iceberg.Properties) error
- func (t *Transaction) AppendTable(ctx context.Context, tbl arrow.Table, batchSize int64, ...) error
- func (t *Transaction) Commit(ctx context.Context) (*Table, error)
- func (t *Transaction) Delete(ctx context.Context, filter iceberg.BooleanExpression, ...) (err error)
- func (t *Transaction) ExpireSnapshots(opts ...ExpireSnapshotsOpt) error
- func (t *Transaction) Overwrite(ctx context.Context, rdr array.RecordReader, snapshotProps iceberg.Properties, ...) error
- func (t *Transaction) OverwriteTable(ctx context.Context, tbl arrow.Table, batchSize int64, ...) error
- func (t *Transaction) ReplaceDataFiles(ctx context.Context, filesToDelete, filesToAdd []string, ...) error
- func (t *Transaction) ReplaceDataFilesWithDataFiles(ctx context.Context, filesToDelete, filesToAdd []iceberg.DataFile, ...) error
- func (t *Transaction) Scan(opts ...ScanOption) (*Scan, error)
- func (t *Transaction) SetProperties(props iceberg.Properties) error
- func (t *Transaction) StagedTable() (*StagedTable, error)
- func (t *Transaction) UpdateSchema(caseSensitive bool, allowIncompatibleChanges bool, opts ...UpdateSchemaOption) *UpdateSchema
- func (t *Transaction) UpdateSpec(caseSensitive bool) *UpdateSpec
- type UnsupportedType
- type Update
- type UpdateSchema
- func (u *UpdateSchema) AddColumn(path []string, fieldType iceberg.Type, doc string, required bool, ...) *UpdateSchema
- func (u *UpdateSchema) Apply() (*iceberg.Schema, error)
- func (u *UpdateSchema) BuildUpdates() ([]Update, []Requirement, error)
- func (u *UpdateSchema) Commit() error
- func (u *UpdateSchema) DeleteColumn(path []string) *UpdateSchema
- func (u *UpdateSchema) MoveAfter(path, relativeTo []string) *UpdateSchema
- func (u *UpdateSchema) MoveBefore(path, relativeTo []string) *UpdateSchema
- func (u *UpdateSchema) MoveColumn(op MoveOp, path, relativeTo []string) *UpdateSchema
- func (u *UpdateSchema) MoveFirst(path []string) *UpdateSchema
- func (u *UpdateSchema) RenameColumn(path []string, newName string) *UpdateSchema
- func (u *UpdateSchema) SetIdentifierField(paths [][]string) *UpdateSchema
- func (u *UpdateSchema) UpdateColumn(path []string, update ColumnUpdate) *UpdateSchema
- type UpdateSchemaOption
- type UpdateSpec
- func (us *UpdateSpec) AddField(sourceColName string, transform iceberg.Transform, partitionFieldName string) *UpdateSpec
- func (us *UpdateSpec) AddIdentity(sourceColName string) *UpdateSpec
- func (us *UpdateSpec) Apply() (iceberg.PartitionSpec, error)
- func (us *UpdateSpec) BuildUpdates() ([]Update, []Requirement, error)
- func (us *UpdateSpec) Commit() error
- func (us *UpdateSpec) RemoveField(name string) *UpdateSpec
- func (us *UpdateSpec) RenameField(name string, newName string) *UpdateSpec
- type Updates
- type WriteTask
Constants ¶
const ( ArrowFieldDocKey = "doc" // Arrow schemas that are generated from the Parquet library will utilize // this key to identify the field id of the source Parquet field. // We use this when converting to Iceberg to provide field IDs ArrowParquetFieldIDKey = "PARQUET:field_id" )
constants to look for as Keys in Arrow field metadata
const ( WriteDataPathKey = "write.data.path" WriteMetadataPathKey = "write.metadata.path" WriteObjectStorePartitionedPathsKey = "write.object-storage.partitioned-paths" WriteObjectStorePartitionedPathsDefault = true ObjectStoreEnabledKey = "write.object-storage.enabled" ObjectStoreEnabledDefault = false DefaultNameMappingKey = "schema.name-mapping.default" MetricsModeColumnConfPrefix = "write.metadata.metrics.column" DefaultWriteMetricsModeKey = "write.metadata.metrics.default" DefaultWriteMetricsModeDefault = "truncate(16)" ParquetRowGroupSizeBytesKey = internal.ParquetRowGroupSizeBytesKey ParquetRowGroupSizeBytesDefault = internal.ParquetRowGroupSizeBytesDefault ParquetRowGroupLimitKey = internal.ParquetRowGroupLimitKey ParquetRowGroupLimitDefault = internal.ParquetRowGroupLimitDefault ParquetPageSizeBytesKey = internal.ParquetPageSizeBytesKey ParquetPageSizeBytesDefault = internal.ParquetPageSizeBytesDefault ParquetPageRowLimitKey = internal.ParquetPageRowLimitKey ParquetPageRowLimitDefault = internal.ParquetPageRowLimitDefault ParquetDictSizeBytesKey = internal.ParquetDictSizeBytesKey ParquetDictSizeBytesDefault = internal.ParquetDictSizeBytesDefault ParquetCompressionKey = internal.ParquetCompressionKey ParquetCompressionDefault = internal.ParquetCompressionDefault ParquetCompressionLevelKey = internal.ParquetCompressionLevelKey ParquetCompressionLevelDefault = internal.ParquetCompressionLevelDefault ParquetBloomFilterMaxBytesKey = internal.ParquetBloomFilterMaxBytesKey ParquetBloomFilterMaxBytesDefault = internal.ParquetBloomFilterMaxBytesDefault ParquetBloomFilterColumnEnabledKeyPrefix = internal.ParquetBloomFilterColumnEnabledKeyPrefix ManifestMergeEnabledKey = "commit.manifest-merge.enabled" ManifestMergeEnabledDefault = false ManifestTargetSizeBytesKey = "commit.manifest.target-size-bytes" ManifestTargetSizeBytesDefault = 8 * 1024 * 1024 // 8 MB ManifestMinMergeCountKey = "commit.manifest.min-count-to-merge" ManifestMinMergeCountDefault = 100 WritePartitionSummaryLimitKey = "write.summary.partition-limit" WritePartitionSummaryLimitDefault = 0 WriteDeleteModeKey = "write.delete.mode" WriteDeleteModeDefault = WriteModeCopyOnWrite MetadataDeleteAfterCommitEnabledKey = "write.metadata.delete-after-commit.enabled" MetadataDeleteAfterCommitEnabledDefault = false MetadataPreviousVersionsMaxKey = "write.metadata.previous-versions-max" MetadataPreviousVersionsMaxDefault = 100 MetadataCompressionKey = "write.metadata.compression-codec" MetadataCompressionDefault = "none" WriteTargetFileSizeBytesKey = "write.target-file-size-bytes" WriteTargetFileSizeBytesDefault = 512 * 1024 * 1024 // 512 MB MinSnapshotsToKeepKey = "min-snapshots-to-keep" MinSnapshotsToKeepDefault = math.MaxInt MaxSnapshotAgeMsKey = "max-snapshot-age-ms" MaxSnapshotAgeMsDefault = math.MaxInt MaxRefAgeMsKey = "max-ref-age-ms" MaxRefAgeMsDefault = math.MaxInt )
const ( PropertyFormatVersion = "format-version" PropertyUuid = "uuid" PropertySnapshotCount = "snapshot-count" PropertyCurrentSnapshotId = "current-snapshot-id" PropertyCurrentSnapshotSummary = "current-snapshot-summary" PropertyCurrentSnapshotTimestamp = "current-snapshot-timestamp" PropertyCurrentSchema = "current-schema" PropertyDefaultPartitionSpec = "default-partition-spec" PropertyDefaultSortOrder = "default-sort-order" )
Reserved properties
const ( MetadataCompressionCodecNone = "none" MetadataCompressionCodecGzip = "gzip" )
Metadata compression codecs
const ( WriteModeCopyOnWrite = "copy-on-write" WriteModeMergeOnRead = "merge-on-read" )
Write modes
const ( InitialSortOrderID = 1 UnsortedSortOrderID = 0 )
const ( UpdateAddSpec = "add-spec" UpdateAddSchema = "add-schema" UpdateAddSnapshot = "add-snapshot" UpdateAddSortOrder = "add-sort-order" UpdateAssignUUID = "assign-uuid" UpdateRemoveProperties = "remove-properties" UpdateRemoveSchemas = "remove-schemas" UpdateRemoveSnapshots = "remove-snapshots" UpdateRemoveSnapshotRef = "remove-snapshot-ref" UpdateRemoveSpec = "remove-partition-specs" UpdateSetCurrentSchema = "set-current-schema" UpdateSetDefaultSortOrder = "set-default-sort-order" UpdateSetDefaultSpec = "set-default-spec" UpdateSetLocation = "set-location" UpdateSetProperties = "set-properties" UpdateSetSnapshotRef = "set-snapshot-ref" UpdateUpgradeFormatVersion = "upgrade-format-version" )
These are the various update actions defined in the iceberg spec
const DefaultFormatVersion = 2
const MainBranch = "main"
const ScanNoLimit = -1
const (
ScanOptionArrowUseLargeTypes = "arrow.use_large_types"
)
const TableRootID = -1
Variables ¶
var ( ErrInvalidMetadataFormatVersion = errors.New("invalid or missing format-version in table metadata") ErrInvalidMetadata = errors.New("invalid metadata") ErrPartitionSpecNotFound = errors.New("partition spec not found") )
var ( ErrInvalidOperation = errors.New("invalid operation value") ErrMissingOperation = errors.New("missing operation key") ErrInvalidRowLineage = errors.New("invalid row lineage") )
var ( ErrInvalidSortOrderID = errors.New("invalid sort order ID") ErrInvalidTransform = errors.New("invalid transform, must be a valid transform string or a transform object") ErrInvalidSortDirection = errors.New("invalid sort direction, must be 'asc' or 'desc'") ErrInvalidNullOrder = errors.New("invalid null order, must be 'nulls-first' or 'nulls-last'") )
var ErrInvalidRefType = errors.New("invalid snapshot ref type, should be 'branch' or 'tag'")
var ErrInvalidRequirement = errors.New("invalid requirement")
var PositionalDeleteArrowSchema, _ = SchemaToArrowSchema(iceberg.PositionalDeleteSchema, nil, true, false)
var ReservedProperties = [9]string{ PropertyFormatVersion, PropertyUuid, PropertySnapshotCount, PropertyCurrentSnapshotId, PropertyCurrentSnapshotSummary, PropertyCurrentSnapshotTimestamp, PropertyCurrentSchema, PropertyDefaultPartitionSpec, PropertyDefaultSortOrder, }
var UnsortedSortOrder = SortOrder{/* contains filtered or unexported fields */}
A default Sort Order indicating no sort order at all
Functions ¶
func ArrowSchemaToIceberg ¶
func ArrowTypeToIceberg ¶
func NewAddPartitionSpecUpdate ¶
func NewAddPartitionSpecUpdate(spec *iceberg.PartitionSpec, initial bool) *addPartitionSpecUpdate
NewAddPartitionSpecUpdate creates a new update that adds the given partition spec to the table metadata. If the initial flag is set to true, the spec is considered the initial spec of the table, and all other previously added specs in the metadata builder are removed.
func NewAddSchemaUpdate ¶
NewAddSchemaUpdate creates a new update that adds the given schema and updates the lastColumnID based on the schema.
func NewAddSnapshotUpdate ¶
func NewAddSnapshotUpdate(snapshot *Snapshot) *addSnapshotUpdate
NewAddSnapshotUpdate creates a new update that adds the given snapshot to the table metadata.
func NewAddSortOrderUpdate ¶
func NewAddSortOrderUpdate(sortOrder *SortOrder) *addSortOrderUpdate
NewAddSortOrderUpdate creates a new update that adds the given sort order to the table metadata. If the initial flag is set to true, the sort order is considered the initial sort order of the table, and all previously added sort orders in the metadata builder are removed.
func NewAssignUUIDUpdate ¶
NewAssignUUIDUpdate creates a new update to assign a UUID to the table metadata.
func NewRemovePropertiesUpdate ¶
func NewRemovePropertiesUpdate(removals []string) *removePropertiesUpdate
NewRemovePropertiesUpdate creates a new update that removes properties from the table metadata. The properties are identified by their names, and if a property with the given name does not exist, it is ignored.
func NewRemoveSchemasUpdate ¶
func NewRemoveSchemasUpdate(schemaIds []int) *removeSchemasUpdate
NewRemoveSchemasUpdate creates a new Update that removes a list of schemas from the table metadata.
func NewRemoveSnapshotRefUpdate ¶
func NewRemoveSnapshotRefUpdate(ref string) *removeSnapshotRefUpdate
NewRemoveSnapshotRefUpdate creates a new update that removes a snapshot reference from the table metadata.
func NewRemoveSnapshotsUpdate ¶
func NewRemoveSnapshotsUpdate(ids []int64) *removeSnapshotsUpdate
NewRemoveSnapshotsUpdate creates a new update that removes all snapshots from the table metadata with the given snapshot IDs.
func NewRemoveSpecUpdate ¶
func NewRemoveSpecUpdate(specIds []int) *removeSpecUpdate
NewRemoveSpecUpdate creates a new Update that removes a list of partition specs from the table metadata.
func NewSetCurrentSchemaUpdate ¶
func NewSetCurrentSchemaUpdate(id int) *setCurrentSchemaUpdate
NewSetCurrentSchemaUpdate creates a new update that sets the current schema of the table metadata to the given schema ID.
func NewSetDefaultSortOrderUpdate ¶
func NewSetDefaultSortOrderUpdate(id int) *setDefaultSortOrderUpdate
NewSetDefaultSortOrderUpdate creates a new update that sets the default sort order of the table metadata to the given sort order ID.
func NewSetDefaultSpecUpdate ¶
func NewSetDefaultSpecUpdate(id int) *setDefaultSpecUpdate
NewSetDefaultSpecUpdate creates a new update that sets the default partition spec of the table metadata to the given spec ID.
func NewSetPropertiesUpdate ¶
func NewSetPropertiesUpdate(updates iceberg.Properties) *setPropertiesUpdate
NewSetPropertiesUpdate creates a new update that sets the given properties in the table metadata.
func NewSetSnapshotRefUpdate ¶
func NewSetSnapshotRefUpdate( name string, snapshotID int64, refType RefType, maxRefAgeMs, maxSnapshotAgeMs int64, minSnapshotsToKeep int, ) *setSnapshotRefUpdate
NewSetSnapshotRefUpdate creates a new update that sets the given snapshot reference as the current snapshot of the table metadata. MaxRefAgeMs, MaxSnapshotAgeMs, and MinSnapshotsToKeep are optional, and any non-positive values are ignored.
func NewUpgradeFormatVersionUpdate ¶
func NewUpgradeFormatVersionUpdate(formatVersion int) *upgradeFormatVersionUpdate
NewUpgradeFormatVersionUpdate creates a new update that upgrades the format version of the table metadata to the given formatVersion.
func NewWriterFactory ¶
func NewWriterFactory(rootLocation string, args recordWritingArgs, meta *MetadataBuilder, taskSchema *iceberg.Schema, targetFileSize int64) writerFactory
NewWriterFactory creates a new WriterFactory with the specified configuration for managing rolling data writerFactory across partitions.
func SchemaToArrowSchema ¶
func SchemaToArrowSchema(sc *iceberg.Schema, metadata map[string]string, includeFieldIDs, useLargeTypes bool) (*arrow.Schema, error)
SchemaToArrowSchema converts an Iceberg schema to an Arrow schema. If the metadata parameter is non-nil, it will be included as the top-level metadata in the schema. If includeFieldIDs is true, then each field of the schema will contain a metadata key PARQUET:field_id set to the field id from the iceberg schema.
func ToRequestedSchema ¶
func ToRequestedSchema(ctx context.Context, requested, fileSchema *iceberg.Schema, batch arrow.RecordBatch, downcastTimestamp, includeFieldIDs, useLargeTypes bool) (arrow.RecordBatch, error)
ToRequestedSchema will construct a new record batch matching the requested iceberg schema casting columns if necessary as appropriate.
func TypeToArrowType ¶
func TypeToArrowType(t iceberg.Type, includeFieldIDs bool, useLargeTypes bool) (arrow.DataType, error)
TypeToArrowType converts a given iceberg type, into the equivalent Arrow data type. For dealing with nested fields (List, Struct, Map) if includeFieldIDs is true, then the child fields will contain a metadata key PARQUET:field_id set to the field id.
func VisitArrowSchema ¶
func VisitArrowSchema[T any](sc *arrow.Schema, visitor ArrowSchemaVisitor[T]) (res T, err error)
func WithMaxRefAgeMs ¶
func WithMaxRefAgeMs(maxRefAgeMs int64) setSnapshotRefOption
func WithMaxSnapshotAgeMs ¶
func WithMaxSnapshotAgeMs(maxSnapshotAgeMs int64) setSnapshotRefOption
func WithMinSnapshotsToKeep ¶
func WithMinSnapshotsToKeep(minSnapshotsToKeep int) setSnapshotRefOption
Types ¶
type ArrowSchemaVisitor ¶
type ArrowSchemaVisitor[T any] interface { Schema(*arrow.Schema, T) T Struct(*arrow.StructType, []T) T Field(arrow.Field, T) T List(arrow.ListLikeType, T) T Map(mt *arrow.MapType, keyResult T, valueResult T) T Primitive(arrow.DataType) T }
ArrowSchemaVisitor is an interface that can be implemented and used to call VisitArrowSchema for iterating
type BlobMetadata ¶
type BlobMetadata struct {
Type BlobType `json:"type"`
SnapshotID int64 `json:"snapshot-id"`
SequenceNumber int64 `json:"sequence-number"`
Fields []int32 `json:"fields"`
Properties map[string]string `json:"properties"`
}
BlobMetadata is the metadata of a statistics or indices blob.
type BlobType ¶
type BlobType string
BlobType is the type of blob in a Puffin file
func (*BlobType) UnmarshalJSON ¶
type CatalogIO ¶
type CatalogIO interface {
LoadTable(context.Context, Identifier) (*Table, error)
CommitTable(context.Context, Identifier, []Requirement, []Update) (Metadata, string, error)
}
type ColumnUpdate ¶
type DeleteOption ¶
type DeleteOption func(deleteOp *deleteOperation)
func WithDeleteCaseInsensitive ¶
func WithDeleteCaseInsensitive() DeleteOption
WithDeleteCaseInsensitive changes the binding of the filter to be case insensitive instead of the Default: case sensitive Note that the sensitivity only applies to the field name and not the evaluation of the literals on string fields.
func WithDeleteConcurrency ¶
func WithDeleteConcurrency(concurrency int) DeleteOption
WithDeleteConcurrency overwrites the default concurrency for delete operations. Default: runtime.GOMAXPROCS(0)
type ErrIncompatibleSchema ¶
type ErrIncompatibleSchema struct {
// contains filtered or unexported fields
}
func (ErrIncompatibleSchema) Error ¶
func (e ErrIncompatibleSchema) Error() string
func (ErrIncompatibleSchema) Unwrap ¶
func (e ErrIncompatibleSchema) Unwrap() error
type ExpireSnapshotsOpt ¶
type ExpireSnapshotsOpt func(*expireSnapshotsCfg)
func WithOlderThan ¶
func WithOlderThan(t time.Duration) ExpireSnapshotsOpt
func WithPostCommit ¶
func WithPostCommit(postCommit bool) ExpireSnapshotsOpt
WithPostCommit controls whether orphaned files (manifests, manifest lists, data files) are deleted immediately after expiring snapshots. Defaults to true. Set to false to defer file deletion to a separate maintenance job, avoiding conflicts with in-flight queries that may still reference those files.
func WithRetainLast ¶
func WithRetainLast(n int) ExpireSnapshotsOpt
type FileScanTask ¶
type Identifier ¶
type Identifier = []string
type IncompatibleField ¶
type IncompatibleField struct {
Field iceberg.NestedField
ColName string
UnsupportedType *UnsupportedType
InvalidDefault *InvalidDefault
}
type InvalidDefault ¶
type LocationProvider ¶
type LocationProvider interface {
NewDataLocation(dataFileName string) string
NewTableMetadataFileLocation(newVersion int) (string, error)
NewMetadataLocation(metadataFileName string) string
}
func LoadLocationProvider ¶
func LoadLocationProvider(tableLocation string, tableProps iceberg.Properties) (LocationProvider, error)
type Metadata ¶
type Metadata interface {
// Version indicates the version of this metadata, 1 for V1, 2 for V2, etc.
Version() int
// TableUUID returns a UUID that identifies the table, generated when the
// table is created. Implementations must throw an exception if a table's
// UUID does not match the expected UUID after refreshing metadata.
TableUUID() uuid.UUID
// Location is the table's base location. This is used by writerFactory to determine
// where to store data files, manifest files, and table metadata files.
Location() string
// LastUpdatedMillis is the timestamp in milliseconds from the unix epoch when
// the table was last updated. Each table metadata file should update this
// field just before writing.
LastUpdatedMillis() int64
// LastColumnID returns the highest assigned column ID for the table.
// This is used to ensure fields are always assigned an unused ID when
// evolving schemas.
LastColumnID() int
// Schemas returns the list of schemas, stored as objects with their
// schema-id.
Schemas() []*iceberg.Schema
// CurrentSchema returns the table's current schema.
CurrentSchema() *iceberg.Schema
// PartitionSpecs returns the list of all partition specs in the table.
PartitionSpecs() []iceberg.PartitionSpec
// PartitionSpec returns the current partition spec that the table is using.
PartitionSpec() iceberg.PartitionSpec
// PartitionSpecByID returns the partition spec with the given ID. Returns
// nil if the ID is not found in the list of partition specs.
PartitionSpecByID(int) *iceberg.PartitionSpec
// DefaultPartitionSpec is the ID of the current spec that writerFactory should
// use by default.
DefaultPartitionSpec() int
// LastPartitionSpecID is the highest assigned partition field ID across
// all partition specs for the table. This is used to ensure partition
// fields are always assigned an unused ID when evolving specs.
LastPartitionSpecID() *int
// Snapshots returns the list of valid snapshots. Valid snapshots are
// snapshots for which all data files exist in the file system. A data
// file must not be deleted from the file system until the last snapshot
// in which it was listed is garbage collected.
Snapshots() []Snapshot
// SnapshotByID find and return a specific snapshot by its ID. Returns
// nil if the ID is not found in the list of snapshots.
SnapshotByID(int64) *Snapshot
// SnapshotByName searches the list of snapshots for a snapshot with a given
// ref name. Returns nil if there's no ref with this name for a snapshot.
SnapshotByName(name string) *Snapshot
// CurrentSnapshot returns the table's current snapshot.
CurrentSnapshot() *Snapshot
// Ref returns the snapshot ref for the main branch.
Ref() SnapshotRef
// Refs returns a list of snapshot name/reference pairs.
Refs() iter.Seq2[string, SnapshotRef]
// SnapshotLogs returns the list of snapshot logs for the table.
SnapshotLogs() iter.Seq[SnapshotLogEntry]
// SortOrder returns the table's current sort order, ie: the one with the
// ID that matches the default-sort-order-id.
SortOrder() SortOrder
// SortOrders returns the list of sort orders in the table.
SortOrders() []SortOrder
// DefaultSortOrder returns the ID of the current sort order that writerFactory
// should use by default.
DefaultSortOrder() int
// Properties is a string to string map of table properties. This is used
// to control settings that affect reading and writing and is not intended
// to be used for arbitrary metadata. For example, commit.retry.num-retries
// is used to control the number of commit retries.
Properties() iceberg.Properties
// PreviousFiles returns the list of metadata log entries for the table.
PreviousFiles() iter.Seq[MetadataLogEntry]
Equals(Metadata) bool
NameMapping() iceberg.NameMapping
LastSequenceNumber() int64
// NextRowID returns the next available row ID for v3 tables.
// Returns 0 for v1/v2 tables or if not set.
NextRowID() int64
// Statistics returns an optional list of table statistics.
// Table statistics files are valid Puffin files.
// StatisticsFile are informational. A reader can choose to ignore statistics information.
// StatisticsFile support is not required to read the table correctly.
// A table can contain many statistics files associated with different table snapshots.
Statistics() iter.Seq[StatisticsFile]
// PartitionStatistics returns an optional list of partition statistics files.
// Partition statistics are not required for reading or planning
// and readers may ignore them. Each table snapshot may be associated
// with at most one partition statistics file. A writer can optionally
// write the partition statistics file during each write operation,
// or it can also be computed on demand.
PartitionStatistics() iter.Seq[PartitionStatisticsFile]
}
Metadata for an iceberg table as specified in the Iceberg spec
https://iceberg.apache.org/spec/#iceberg-table-spec
func NewMetadata ¶
func NewMetadata(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, location string, props iceberg.Properties) (Metadata, error)
NewMetadata creates a new table metadata object using the provided schema, information, generating a fresh UUID for the new table metadata. By default, this will generate a V2 table metadata, but this can be modified by adding a "format-version" property to the props map. An error will be returned if the "format-version" property exists and is not a valid version number.
func NewMetadataWithUUID ¶
func NewMetadataWithUUID(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, location string, props iceberg.Properties, tableUuid uuid.UUID) (Metadata, error)
NewMetadataWithUUID is like NewMetadata, but allows the caller to specify the UUID of the table rather than creating a new one.
func ParseMetadata ¶
ParseMetadata parses json metadata provided by the passed in reader, returning an error if one is encountered.
func ParseMetadataBytes ¶
ParseMetadataBytes is like ParseMetadataString but for a byte slice.
func ParseMetadataString ¶
ParseMetadataString is like ParseMetadata, but for a string rather than an io.Reader.
type MetadataBuilder ¶
type MetadataBuilder struct {
// contains filtered or unexported fields
}
MetadataBuilder is a struct used for building and updating Iceberg table metadata.
It keeps track of applied changes in the `updates` field. This can be used to commit changes made to a table to the catalog.
func MetadataBuilderFromBase ¶
func MetadataBuilderFromBase(metadata Metadata, currentFileLocation string) (*MetadataBuilder, error)
MetadataBuilderFromBase creates a MetadataBuilder from an existing Metadata object. currentFileLocation is the location where the current version of the metadata file is stored. This is used to update the metadata log. If currentFileLocation is empty, the metadata log will not be updated. This should only be used to stage-create tables.
func NewMetadataBuilder ¶
func NewMetadataBuilder(formatVersion int) (*MetadataBuilder, error)
func (*MetadataBuilder) AddPartitionSpec ¶
func (b *MetadataBuilder) AddPartitionSpec(spec *iceberg.PartitionSpec, initial bool) error
func (*MetadataBuilder) AddSchema ¶
func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema) error
func (*MetadataBuilder) AddSnapshot ¶
func (b *MetadataBuilder) AddSnapshot(snapshot *Snapshot) error
func (*MetadataBuilder) AddSortOrder ¶
func (b *MetadataBuilder) AddSortOrder(sortOrder *SortOrder) error
func (*MetadataBuilder) AppendMetadataLog ¶
func (b *MetadataBuilder) AppendMetadataLog(entry MetadataLogEntry) *MetadataBuilder
func (*MetadataBuilder) Build ¶
func (b *MetadataBuilder) Build() (Metadata, error)
func (*MetadataBuilder) CurrentSchema ¶
func (b *MetadataBuilder) CurrentSchema() *iceberg.Schema
func (*MetadataBuilder) CurrentSpec ¶
func (b *MetadataBuilder) CurrentSpec() (*iceberg.PartitionSpec, error)
func (*MetadataBuilder) GetSchemaByID ¶
func (b *MetadataBuilder) GetSchemaByID(id int) (*iceberg.Schema, error)
func (*MetadataBuilder) GetSortOrderByID ¶
func (b *MetadataBuilder) GetSortOrderByID(id int) (*SortOrder, error)
func (*MetadataBuilder) GetSpecByID ¶
func (b *MetadataBuilder) GetSpecByID(id int) (*iceberg.PartitionSpec, error)
func (*MetadataBuilder) HasChanges ¶
func (b *MetadataBuilder) HasChanges() bool
func (*MetadataBuilder) LastUpdatedMS ¶
func (b *MetadataBuilder) LastUpdatedMS() int64
func (*MetadataBuilder) NameMapping ¶
func (b *MetadataBuilder) NameMapping() iceberg.NameMapping
func (*MetadataBuilder) NextRowID ¶
func (b *MetadataBuilder) NextRowID() int64
NextRowID returns the next available row ID (for v3 row lineage). For v1/v2 returns 0.
func (*MetadataBuilder) RemovePartitionSpecs ¶
func (b *MetadataBuilder) RemovePartitionSpecs(ints []int) error
func (*MetadataBuilder) RemoveProperties ¶
func (b *MetadataBuilder) RemoveProperties(keys []string) error
func (*MetadataBuilder) RemoveSchemas ¶
func (b *MetadataBuilder) RemoveSchemas(ints []int) error
func (*MetadataBuilder) RemoveSnapshotRef ¶
func (b *MetadataBuilder) RemoveSnapshotRef(name string) error
func (*MetadataBuilder) RemoveSnapshots ¶
func (b *MetadataBuilder) RemoveSnapshots(snapshotIds []int64) error
func (*MetadataBuilder) SetCurrentSchemaID ¶
func (b *MetadataBuilder) SetCurrentSchemaID(currentSchemaID int) error
func (*MetadataBuilder) SetDefaultSortOrderID ¶
func (b *MetadataBuilder) SetDefaultSortOrderID(defaultSortOrderID int) error
func (*MetadataBuilder) SetDefaultSpecID ¶
func (b *MetadataBuilder) SetDefaultSpecID(defaultSpecID int) error
func (*MetadataBuilder) SetFormatVersion ¶
func (b *MetadataBuilder) SetFormatVersion(formatVersion int) error
func (*MetadataBuilder) SetLastUpdatedMS ¶
func (b *MetadataBuilder) SetLastUpdatedMS() *MetadataBuilder
func (*MetadataBuilder) SetLoc ¶
func (b *MetadataBuilder) SetLoc(loc string) error
func (*MetadataBuilder) SetProperties ¶
func (b *MetadataBuilder) SetProperties(props iceberg.Properties) error
func (*MetadataBuilder) SetSnapshotRef ¶
func (b *MetadataBuilder) SetSnapshotRef( name string, snapshotID int64, refType RefType, options ...setSnapshotRefOption, ) error
func (*MetadataBuilder) SnapshotByID ¶
func (b *MetadataBuilder) SnapshotByID(id int64) (*Snapshot, error)
func (*MetadataBuilder) TrimMetadataLogs ¶
func (b *MetadataBuilder) TrimMetadataLogs(maxEntries int) *MetadataBuilder
type MetadataLogEntry ¶
type Operation ¶
type Operation string
func ValidOperation ¶
ValidOperation ensures that a given string is one of the valid operation types: append,replace,overwrite,delete
type OrphanCleanupOption ¶
type OrphanCleanupOption func(*orphanCleanupConfig)
func WithDeleteFunc ¶
func WithDeleteFunc(deleteFunc func(string) error) OrphanCleanupOption
WithDeleteFunc sets a custom delete function. If not provided, the table's FileIO delete method will be used.
func WithDryRun ¶
func WithDryRun(enabled bool) OrphanCleanupOption
func WithEqualAuthorities ¶
func WithEqualAuthorities(authorities map[string]string) OrphanCleanupOption
WithEqualAuthorities specifies authorities that should be considered equivalent. For example, map["endpoint1.s3.amazonaws.com,endpoint2.s3.amazonaws.com"] = "s3.amazonaws.com" treats different S3 endpoints as equivalent. The key can be a comma-separated list.
func WithEqualSchemes ¶
func WithEqualSchemes(schemes map[string]string) OrphanCleanupOption
WithEqualSchemes specifies schemes that should be considered equivalent. For example, map["s3,s3a,s3n"] = "s3" treats all S3 scheme variants as equivalent. The key can be a comma-separated list of schemes that map to the value scheme.
func WithFilesOlderThan ¶
func WithFilesOlderThan(duration time.Duration) OrphanCleanupOption
func WithLocation ¶
func WithLocation(location string) OrphanCleanupOption
func WithMaxConcurrency ¶
func WithMaxConcurrency(maxWorkers int) OrphanCleanupOption
WithMaxConcurrency sets the maximum number of goroutines for parallel deletion. Defaults to a reasonable number based on the system. Only used when deleteFunc is nil or when the FileIO doesn't support bulk operations.
func WithPrefixMismatchMode ¶
func WithPrefixMismatchMode(mode PrefixMismatchMode) OrphanCleanupOption
WithPrefixMismatchMode sets how to handle situations when metadata references files that match listed files except for authority/scheme differences.
type OrphanCleanupResult ¶
type OverwriteOption ¶
type OverwriteOption func(op *overwriteOperation)
OverwriteOption applies options to overwrite operations
func WithOverwriteCaseInsensitive ¶
func WithOverwriteCaseInsensitive() OverwriteOption
WithOverwriteCaseInsensitive overwrites the default case sensitivity that applies on the binding of the filter. Default: case sensitive Note that the sensitivity only applies to the field name and not the evaluation of the literals on string fields.
func WithOverwriteConcurrency ¶
func WithOverwriteConcurrency(concurrency int) OverwriteOption
WithOverwriteConcurrency overwrites the default concurrency for overwrite operations. Default: runtime.GOMAXPROCS(0)
func WithOverwriteFilter ¶
func WithOverwriteFilter(filter iceberg.BooleanExpression) OverwriteOption
WithOverwriteFilter overwrites the default deletion filter on overwrite operations. Default: iceberg.AlwaysTrue
type PartitionStatisticsFile ¶
type PartitionStatisticsFile struct {
SnapshotID int64 `json:"snapshot-id"`
StatisticsPath string `json:"statistics-path"`
FileSizeInBytes int64 `json:"file-size-in-bytes"`
}
PartitionStatisticsFile represents a partition statistics file that can be used to read table data more efficiently.
Statistics are informational. A reader can choose to ignore statistics information. Statistics support is not required to read the table correctly.
type PrefixMismatchMode ¶
type PrefixMismatchMode int
PrefixMismatchMode defines how to handle cases where candidate files have different URI schemes or authorities compared to table location during orphan cleanup. This is useful when files may be referenced using different but equivalent schemes (e.g., s3:// vs s3a:// vs s3n://) or when cleaning up files across different locations.
const ( // PrefixMismatchError causes cleanup to fail with an error when candidate files // have URI schemes/authorities that don't match the table location and are not // covered by configured equivalences. This is the safest default behavior. PrefixMismatchError PrefixMismatchMode = iota // default // PrefixMismatchIgnore skips candidate files that have mismatched URI schemes/authorities // without treating it as an error. Files are silently ignored and not considered for deletion. PrefixMismatchIgnore // PrefixMismatchDelete treats candidate files with mismatched URI schemes/authorities // as orphans and includes them for deletion. Use with caution as this may delete // files from unexpected locations. PrefixMismatchDelete )
func (PrefixMismatchMode) String ¶
func (p PrefixMismatchMode) String() string
type ReassignedIds ¶
type ReassignedIds struct {
// contains filtered or unexported fields
}
type Requirement ¶
type Requirement interface {
// Validate checks that the current table metadata satisfies the requirement.
Validate(Metadata) error
GetType() string
}
A Requirement is a validation rule that must be satisfied before attempting to make and commit changes to a table. Requirements are used to ensure that the table is in a valid state before making changes.
func AssertCreate ¶
func AssertCreate() Requirement
AssertCreate creates a requirement that the table does not already exist.
func AssertCurrentSchemaID ¶
func AssertCurrentSchemaID(id int) Requirement
AssertCurrentSchemaId creates a requirement that the table's current schema ID matches the given id.
func AssertDefaultSortOrderID ¶
func AssertDefaultSortOrderID(id int) Requirement
AssertDefaultSortOrderID creates a requirement that the table's default sort order ID matches the given id.
func AssertDefaultSpecID ¶
func AssertDefaultSpecID(id int) Requirement
AssertDefaultSpecID creates a requirement that the table's default partition spec ID matches the given id.
func AssertLastAssignedFieldID ¶
func AssertLastAssignedFieldID(id int) Requirement
AssertLastAssignedFieldID validates that the table's last assigned column ID matches the given id.
func AssertLastAssignedPartitionID ¶
func AssertLastAssignedPartitionID(id int) Requirement
AssertLastAssignedPartitionID creates a requriement that the table's last assigned partition ID matches the given id.
func AssertRefSnapshotID ¶
func AssertRefSnapshotID(ref string, id *int64) Requirement
AssertRefSnapshotID creates a requirement which ensures that the table branch or tag identified by the given ref must reference the given snapshot id. If the id is nil, the ref must not already exist.
func AssertTableUUID ¶
func AssertTableUUID(uuid uuid.UUID) Requirement
AssertTableUUID creates a requirement that the table UUID matches the given UUID.
func ParseRequirement ¶
func ParseRequirement(r io.Reader) (Requirement, error)
ParseRequirement parses json data provided by the reader into a Requirement
func ParseRequirementBytes ¶
func ParseRequirementBytes(b []byte) (Requirement, error)
ParseRequirementBytes parses json bytes into a Requirement
func ParseRequirementString ¶
func ParseRequirementString(s string) (Requirement, error)
ParseRequirementString parses json string into a Requirement
type Requirements ¶
type Requirements []Requirement
func (*Requirements) UnmarshalJSON ¶
func (r *Requirements) UnmarshalJSON(data []byte) error
type RollingDataWriter ¶
type RollingDataWriter struct {
// contains filtered or unexported fields
}
RollingDataWriter accumulates Arrow records for a specific partition and flushes them to data files when the target file size is reached, implementing a rolling file strategy to manage file sizes.
func (*RollingDataWriter) Add ¶
func (r *RollingDataWriter) Add(record arrow.RecordBatch) error
Add appends a record to the writer's buffer and flushes to a data file if the target file size is reached.
type Scan ¶
type Scan struct {
// contains filtered or unexported fields
}
func (*Scan) PlanFiles ¶
func (scan *Scan) PlanFiles(ctx context.Context) ([]FileScanTask, error)
PlanFiles orchestrates the fetching and filtering of manifests, and then building a list of FileScanTasks that match the current Scan criteria.
func (*Scan) ToArrowRecords ¶
func (scan *Scan) ToArrowRecords(ctx context.Context) (*arrow.Schema, iter.Seq2[arrow.RecordBatch, error], error)
ToArrowRecords returns the arrow schema of the expected records and an interator that can be used with a range expression to read the records as they are available. If an error is encountered, during the planning and setup then this will return the error directly. If the error occurs while iterating the records, it will be returned by the iterator.
The purpose for returning the schema up front is to handle the case where there are no rows returned. The resulting Arrow Schema of the projection will still be known.
func (*Scan) ToArrowTable ¶
ToArrowTable calls ToArrowRecords and then gathers all of the records together and returns an arrow.Table make from those records.
func (*Scan) UseRowLimit ¶
type ScanOption ¶
type ScanOption func(*Scan)
func WitMaxConcurrency ¶
func WitMaxConcurrency(n int) ScanOption
WitMaxConcurrency sets the maximum concurrency for table scan and plan operations. When unset it defaults to runtime.GOMAXPROCS.
func WithCaseSensitive ¶
func WithCaseSensitive(b bool) ScanOption
func WithLimit ¶
func WithLimit(n int64) ScanOption
func WithOptions ¶
func WithOptions(opts iceberg.Properties) ScanOption
func WithRowFilter ¶
func WithRowFilter(e iceberg.BooleanExpression) ScanOption
func WithSelectedFields ¶
func WithSelectedFields(fields ...string) ScanOption
func WithSnapshotAsOf ¶
func WithSnapshotAsOf(timeStampMs int64) ScanOption
func WithSnapshotID ¶
func WithSnapshotID(n int64) ScanOption
type SequenceNumberValidator ¶
type SequenceNumberValidator interface {
LastSequenceNumber() int64
}
SequenceNumberValidator defines an interface for types that can validate sequence numbers
type Snapshot ¶
type Snapshot struct {
SnapshotID int64 `json:"snapshot-id"`
ParentSnapshotID *int64 `json:"parent-snapshot-id,omitempty"`
SequenceNumber int64 `json:"sequence-number"`
TimestampMs int64 `json:"timestamp-ms"`
ManifestList string `json:"manifest-list,omitempty"`
Summary *Summary `json:"summary,omitempty"`
SchemaID *int `json:"schema-id,omitempty"`
FirstRowID *int64 `json:"first-row-id,omitempty"` // V3: Starting row ID for this snapshot
AddedRows *int64 `json:"added-rows,omitempty"` // V3: Number of rows added by this snapshot
}
func (Snapshot) ValidateRowLineage ¶
type SnapshotLogEntry ¶
type SnapshotRef ¶
type SnapshotRef struct {
SnapshotID int64 `json:"snapshot-id"`
SnapshotRefType RefType `json:"type"`
MinSnapshotsToKeep *int `json:"min-snapshots-to-keep,omitempty"`
MaxSnapshotAgeMs *int64 `json:"max-snapshot-age-ms,omitempty"`
MaxRefAgeMs *int64 `json:"max-ref-age-ms,omitempty"`
}
SnapshotRef represents the reference information for a specific snapshot
func (*SnapshotRef) Equals ¶
func (s *SnapshotRef) Equals(rhs SnapshotRef) bool
func (*SnapshotRef) UnmarshalJSON ¶
func (s *SnapshotRef) UnmarshalJSON(b []byte) error
type SnapshotSummaryCollector ¶
type SnapshotSummaryCollector struct {
// contains filtered or unexported fields
}
type SortDirection ¶
type SortDirection string
const ( SortASC SortDirection = "asc" SortDESC SortDirection = "desc" )
type SortField ¶
type SortField struct {
// SourceID is the source column id from the table's schema
SourceID int `json:"source-id"`
// Transform is the tranformation used to produce values to be
// sorted on from the source column.
Transform iceberg.Transform `json:"transform"`
// Direction is an enum indicating ascending or descending direction.
Direction SortDirection `json:"direction"`
// NullOrder describes the order of null values when sorting
// should be only either nulls-first or nulls-last enum values.
NullOrder NullOrder `json:"null-order"`
}
SortField describes a field used in a sort order definition.
func (*SortField) MarshalJSON ¶
func (*SortField) UnmarshalJSON ¶
type SortOrder ¶
type SortOrder struct {
// contains filtered or unexported fields
}
SortOrder describes how the data is sorted within the table.
Data can be sorted within partitions by columns to gain performance. The order of the sort fields within the list defines the order in which the sort is applied to the data.
func AssignFreshSortOrderIDs ¶
AssignFreshSortOrderIDs updates and reassigns the field source IDs from the old schema to the corresponding fields in the fresh schema, while also giving the Sort Order a fresh ID of 0 (the initial Sort Order ID).
func AssignFreshSortOrderIDsWithID ¶
func AssignFreshSortOrderIDsWithID(sortOrder SortOrder, old, fresh *iceberg.Schema, sortOrderID int) (SortOrder, error)
AssignFreshSortOrderIDsWithID is like AssignFreshSortOrderIDs but allows specifying the id of the returned SortOrder.
func NewSortOrder ¶
NewSortOrder creates a new SortOrder.
The orderID must be greater than or equal to 0. If orderID is 0, no fields can be passed, this is equal to UnsortedSortOrder. Fields need to have non-nil Transform, valid Direction and NullOrder values.
func (*SortOrder) CheckCompatibility ¶
func (SortOrder) IsUnsorted ¶
func (SortOrder) MarshalJSON ¶
func (*SortOrder) UnmarshalJSON ¶
type StagedTable ¶
type StagedTable struct {
*Table
}
func (*StagedTable) Scan ¶
func (s *StagedTable) Scan(opts ...ScanOption) *Scan
type StatisticsFile ¶
type StatisticsFile struct {
SnapshotID int64 `json:"snapshot-id"`
StatisticsPath string `json:"statistics-path"`
FileSizeInBytes int64 `json:"file-size-in-bytes"`
KeyMetadata *string `json:"key-metadata,omitempty"`
BlobMetadata []BlobMetadata `json:"blob-metadata"`
}
StatisticsFile represents a statistics file in the Puffin format, that can be used to read table data more efficiently.
Statistics are informational. A reader can choose to ignore statistics information. Statistics support is not required to read the table correctly.
type Summary ¶
type Summary struct {
Operation Operation
Properties iceberg.Properties
}
Summary stores the summary information for a snapshot indicating the operation that created the snapshot, and various properties which might exist in the summary.
func (*Summary) MarshalJSON ¶
func (*Summary) UnmarshalJSON ¶
type Table ¶
type Table struct {
// contains filtered or unexported fields
}
func NewFromLocation ¶
func (Table) AllManifests ¶
func (Table) Append ¶
func (t Table) Append(ctx context.Context, rdr array.RecordReader, snapshotProps iceberg.Properties) (*Table, error)
Append is a shortcut for NewTransaction().Append() and then committing the transaction
func (Table) AppendTable ¶
func (t Table) AppendTable(ctx context.Context, tbl arrow.Table, batchSize int64, snapshotProps iceberg.Properties) (*Table, error)
AppendTable is a shortcut for NewTransaction().AppendTable() and then committing the transaction
func (Table) CurrentSnapshot ¶
func (Table) Delete ¶
func (t Table) Delete(ctx context.Context, filter iceberg.BooleanExpression, snapshotProps iceberg.Properties, opts ...DeleteOption) (*Table, error)
Delete is a shortcut for NewTransaction().Delete() and then committing the transaction.
The provided filter acts as a row-level predicate on existing data:
- Files where all rows match the filter (strict match) are completely deleted
- Files where some rows match and others don't (partial match) are rewritten to keep only non-matching rows
- Files where no rows match the filter are kept unchanged
The filter uses both inclusive and strict metrics evaluators on file statistics to classify files:
- Inclusive evaluator identifies candidate files that may contain matching rows
- Strict evaluator determines if all rows in a file must match the filter
- Files that pass inclusive but not strict evaluation are rewritten with filtered data
The concurrency parameter controls the level of parallelism for manifest processing and file rewriting and can be overridden using the WithOverwriteConcurrency option. Defaults to runtime.GOMAXPROCS(0).
func (Table) DeleteOrphanFiles ¶
func (t Table) DeleteOrphanFiles(ctx context.Context, opts ...OrphanCleanupOption) (OrphanCleanupResult, error)
func (Table) Identifier ¶
func (t Table) Identifier() Identifier
func (Table) LocationProvider ¶
func (t Table) LocationProvider() (LocationProvider, error)
func (Table) MetadataLocation ¶
func (Table) NameMapping ¶
func (t Table) NameMapping() iceberg.NameMapping
func (Table) NewTransaction ¶
func (t Table) NewTransaction() *Transaction
func (Table) Overwrite ¶
func (t Table) Overwrite(ctx context.Context, rdr array.RecordReader, snapshotProps iceberg.Properties, opts ...OverwriteOption) (*Table, error)
Overwrite is a shortcut for NewTransaction().Overwrite() and then committing the transaction.
An optional filter (see WithOverwriteFilter) determines which existing data to delete or rewrite:
- If filter is nil or AlwaysTrue, all existing data files are deleted and replaced with new data.
- If a filter is provided, it acts as a row-level predicate on existing data:
- Files where all rows match the filter (strict match) are completely deleted
- Files where some rows match and others don't (partial match) are rewritten to keep only non-matching rows
- Files where no rows match the filter are kept unchanged
The filter uses both inclusive and strict metrics evaluators on file statistics to classify files:
- Inclusive evaluator identifies candidate files that may contain matching rows
- Strict evaluator determines if all rows in a file must match the filter
- Files that pass inclusive but not strict evaluation are rewritten with filtered data
New data from the provided RecordReader is written to the table regardless of the filter.
The concurrency parameter controls the level of parallelism for manifest processing and file rewriting and can be overridden using the WithOverwriteConcurrency option. Defaults to runtime.GOMAXPROCS(0).
func (Table) OverwriteTable ¶
func (t Table) OverwriteTable(ctx context.Context, tbl arrow.Table, batchSize int64, snapshotProps iceberg.Properties, opts ...OverwriteOption) (*Table, error)
OverwriteTable is a shortcut for NewTransaction().OverwriteTable() and then committing the transaction.
An optional filter (see WithOverwriteFilter) determines which existing data to delete or rewrite:
- If filter is nil or AlwaysTrue, all existing data files are deleted and replaced with new data.
- If a filter is provided, it acts as a row-level predicate on existing data:
- Files where all rows match the filter (strict match) are completely deleted
- Files where some rows match and others don't (partial match) are rewritten to keep only non-matching rows
- Files where no rows match the filter are kept unchanged
The filter uses both inclusive and strict metrics evaluators on file statistics to classify files:
- Inclusive evaluator identifies candidate files that may contain matching rows
- Strict evaluator determines if all rows in a file must match the filter
- Files that pass inclusive but not strict evaluation are rewritten with filtered data
New data from the provided table is written to the table regardless of the filter.
The batchSize parameter refers to the batch size for reading the input data, not the batch size for writes. The concurrency parameter controls the level of parallelism for manifest processing and file rewriting and can be overridden using the WithOverwriteConcurrency option. Defaults to runtime.GOMAXPROCS(0).
func (Table) Properties ¶
func (t Table) Properties() iceberg.Properties
func (Table) Scan ¶
func (t Table) Scan(opts ...ScanOption) *Scan
func (Table) SnapshotAsOf ¶
SnapshotAsOf finds the snapshot that was current as of or right before the given timestamp.
func (Table) SnapshotByID ¶
func (Table) SnapshotByName ¶
func (Table) Spec ¶
func (t Table) Spec() iceberg.PartitionSpec
type Transaction ¶
type Transaction struct {
// contains filtered or unexported fields
}
func (*Transaction) AddDataFiles ¶
func (t *Transaction) AddDataFiles(ctx context.Context, dataFiles []iceberg.DataFile, snapshotProps iceberg.Properties) error
AddDataFiles adds pre-built DataFiles to the table without scanning them from storage. This is useful for clients who have already constructed DataFile objects with metadata, avoiding the need to read files to extract schema and statistics.
Unlike AddFiles, this method does not read files from storage. It validates only metadata that can be checked without opening files (for example spec-id and partition field IDs).
Callers are responsible for ensuring each DataFile is valid and consistent with the table. Supplying incorrect DataFile metadata can produce an invalid snapshot and break reads.
func (*Transaction) AddFiles ¶
func (t *Transaction) AddFiles(ctx context.Context, files []string, snapshotProps iceberg.Properties, ignoreDuplicates bool) error
func (*Transaction) Append ¶
func (t *Transaction) Append(ctx context.Context, rdr array.RecordReader, snapshotProps iceberg.Properties) error
func (*Transaction) AppendTable ¶
func (t *Transaction) AppendTable(ctx context.Context, tbl arrow.Table, batchSize int64, snapshotProps iceberg.Properties) error
func (*Transaction) Delete ¶
func (t *Transaction) Delete(ctx context.Context, filter iceberg.BooleanExpression, snapshotProps iceberg.Properties, opts ...DeleteOption) (err error)
Delete deletes records matching the provided filter.
The provided filter acts as a row-level predicate on existing data:
- Files where all rows match the filter (strict match) are completely deleted
- Files where some rows match and others don't (partial match) are rewritten to keep only non-matching rows
- Files where no rows match the filter are kept unchanged
The filter uses both inclusive and strict metrics evaluators on file statistics to classify files:
- Inclusive evaluator identifies candidate files that may contain matching rows
- Strict evaluator determines if all rows in a file must match the filter
- Files that pass inclusive but not strict evaluation are rewritten with filtered data
The concurrency parameter controls the level of parallelism for manifest processing and file rewriting and can be overridden using the WithOverwriteConcurrency option. Defaults to runtime.GOMAXPROCS(0).
func (*Transaction) ExpireSnapshots ¶
func (t *Transaction) ExpireSnapshots(opts ...ExpireSnapshotsOpt) error
func (*Transaction) Overwrite ¶
func (t *Transaction) Overwrite(ctx context.Context, rdr array.RecordReader, snapshotProps iceberg.Properties, opts ...OverwriteOption) error
Overwrite overwrites the table data using a RecordReader.
An optional filter (see WithOverwriteFilter) determines which existing data to delete or rewrite:
- If filter is nil or AlwaysTrue, all existing data files are deleted and replaced with new data.
- If a filter is provided, it acts as a row-level predicate on existing data:
- Files where all rows match the filter (strict match) are completely deleted
- Files where some rows match and others don't (partial match) are rewritten to keep only non-matching rows
- Files where no rows match the filter are kept unchanged
The filter uses both inclusive and strict metrics evaluators on file statistics to classify files:
- Inclusive evaluator identifies candidate files that may contain matching rows
- Strict evaluator determines if all rows in a file must match the filter
- Files that pass inclusive but not strict evaluation are rewritten with filtered data
New data from the provided RecordReader is written to the table regardless of the filter.
The concurrency parameter controls the level of parallelism for manifest processing and file rewriting and can be overridden using the WithOverwriteConcurrency option. If concurrency <= 0, defaults to runtime.GOMAXPROCS(0).
func (*Transaction) OverwriteTable ¶
func (t *Transaction) OverwriteTable(ctx context.Context, tbl arrow.Table, batchSize int64, snapshotProps iceberg.Properties, opts ...OverwriteOption) error
OverwriteTable overwrites the table data using an Arrow Table.
An optional filter (see WithOverwriteFilter) determines which existing data to delete or rewrite:
- If filter is nil or AlwaysTrue, all existing data files are deleted and replaced with new data.
- If a filter is provided, it acts as a row-level predicate on existing data:
- Files where all rows match the filter (strict match) are completely deleted
- Files where some rows match and others don't (partial match) are rewritten to keep only non-matching rows
- Files where no rows match the filter are kept unchanged
The filter uses both inclusive and strict metrics evaluators on file statistics to classify files:
- Inclusive evaluator identifies candidate files that may contain matching rows
- Strict evaluator determines if all rows in a file must match the filter
- Files that pass inclusive but not strict evaluation are rewritten with filtered data
New data from the provided table is written to the table regardless of the filter.
The batchSize parameter refers to the batch size for reading the input data, not the batch size for writes. The concurrency parameter controls the level of parallelism for manifest processing and file rewriting and can be overridden using the WithOverwriteConcurrency option. If concurrency <= 0, defaults to runtime.GOMAXPROCS(0).
func (*Transaction) ReplaceDataFiles ¶
func (t *Transaction) ReplaceDataFiles(ctx context.Context, filesToDelete, filesToAdd []string, snapshotProps iceberg.Properties) error
ReplaceFiles is actually just an overwrite operation with multiple files deleted and added.
TODO: technically, this could be a REPLACE operation but we aren't performing any validation here that there are no changes to the underlying data. A REPLACE operation is only valid if the data is exactly the same as the previous snapshot.
For now, we'll keep using an overwrite operation.
func (*Transaction) ReplaceDataFilesWithDataFiles ¶
func (t *Transaction) ReplaceDataFilesWithDataFiles(ctx context.Context, filesToDelete, filesToAdd []iceberg.DataFile, snapshotProps iceberg.Properties) error
ReplaceDataFilesWithDataFiles replaces files using pre-built DataFile objects. This avoids scanning files to extract schema and statistics - the caller provides DataFile objects directly with all required metadata.
For the files to add, use iceberg.NewDataFileBuilder to construct DataFile objects with the appropriate metadata (path, record count, file size, partition values).
This method does not open files. It validates only metadata that can be checked without reading file contents.
Callers are responsible for ensuring each DataFile is valid and consistent with the table. Supplying incorrect DataFile metadata can produce an invalid snapshot and break reads.
This is useful when:
- Files are written via a separate I/O path and metadata is already known
- Avoiding file scanning improves performance or reliability
- Working with storage systems where immediate file reads may be unreliable
func (*Transaction) Scan ¶
func (t *Transaction) Scan(opts ...ScanOption) (*Scan, error)
func (*Transaction) SetProperties ¶
func (t *Transaction) SetProperties(props iceberg.Properties) error
func (*Transaction) StagedTable ¶
func (t *Transaction) StagedTable() (*StagedTable, error)
func (*Transaction) UpdateSchema ¶
func (t *Transaction) UpdateSchema(caseSensitive bool, allowIncompatibleChanges bool, opts ...UpdateSchemaOption) *UpdateSchema
UpdateSchema creates a new UpdateSchema instance for managing schema changes within this transaction.
Parameters:
- caseSensitive: If true, field name lookups are case-sensitive; if false, field names are matched case-insensitively.
- allowIncompatibleChanges: If true, allows schema changes that would normally be rejected for being incompatible (e.g., adding required fields without default values, changing field types in non-promotable ways, or changing column nullability from optional to required).
- opts: Optional configuration functions to customize the UpdateSchema behavior.
Returns an UpdateSchema instance that can be used to build and apply schema changes.
func (*Transaction) UpdateSpec ¶
func (t *Transaction) UpdateSpec(caseSensitive bool) *UpdateSpec
type UnsupportedType ¶
type UnsupportedType struct {
MinFormatVersion int
}
type Update ¶
type Update interface {
// Action returns the name of the action that the update represents.
Action() string
// Apply applies the update to the given metadata builder.
Apply(*MetadataBuilder) error
// PostCommit is called after successful commit of the update
PostCommit(context.Context, *Table, *Table) error
}
Update represents a change to a table's metadata.
func NewSetLocationUpdate ¶
NewSetLocationUpdate creates a new update that sets the location of the table metadata.
type UpdateSchema ¶
type UpdateSchema struct {
// contains filtered or unexported fields
}
UpdateSchema manages schema evolution operations within a transaction. It supports adding, deleting, renaming, updating, and reordering columns, and ensures all changes are validated before being committed.
Operations can be chained together and are applied in the order they are called. Changes are not persisted until Commit() is called.
Basic Usage:
txn := table.NewTransaction()
updateSchema := txn.UpdateSchema(true, false)
// Add a new column
updateSchema.AddColumn([]string{"email"}, iceberg.PrimitiveTypes.String, "Email address", false, nil)
// Commit changes
if err := updateSchema.Commit(); err != nil {
return err
}
if _, err := txn.Commit(ctx); err != nil {
return err
}
Chaining Operations:
updateSchema.
AddColumn([]string{"age"}, iceberg.PrimitiveTypes.Int, "User age", false, nil).
RenameColumn([]string{"name"}, "full_name").
MoveFirst([]string{"id"}).
Commit()
Adding Nested Columns:
// Add a column to a struct field
updateSchema.AddColumn([]string{"address", "country"}, iceberg.PrimitiveTypes.String, "Country code", false, iceberg.StringLiteral("US"))
// Commit the schema update
if err := updateSchema.Commit(); err != nil {
return err
}
if _, err := txn.Commit(ctx); err != nil {
return err
}
func NewUpdateSchema ¶
func NewUpdateSchema(txn *Transaction, caseSensitive bool, allowIncompatibleChanges bool, opts ...UpdateSchemaOption) *UpdateSchema
NewUpdateSchema creates a new UpdateSchema instance for managing schema changes within a transaction.
Parameters:
- txn: The transaction that this schema update will be applied to.
- caseSensitive: If true, field name lookups are case-sensitive; if false, field names are matched case-insensitively.
- allowIncompatibleChanges: If true, allows schema changes that would normally be rejected for being incompatible (e.g., adding required fields without default values, changing field types in non-promotable ways, or changing column nullability from optional to required).
- opts: Optional configuration functions to customize the UpdateSchema behavior.
Returns an UpdateSchema instance that can be used to build and apply schema changes.
func (*UpdateSchema) AddColumn ¶
func (u *UpdateSchema) AddColumn(path []string, fieldType iceberg.Type, doc string, required bool, defaultValue iceberg.Literal) *UpdateSchema
func (*UpdateSchema) BuildUpdates ¶
func (u *UpdateSchema) BuildUpdates() ([]Update, []Requirement, error)
func (*UpdateSchema) Commit ¶
func (u *UpdateSchema) Commit() error
func (*UpdateSchema) DeleteColumn ¶
func (u *UpdateSchema) DeleteColumn(path []string) *UpdateSchema
func (*UpdateSchema) MoveAfter ¶
func (u *UpdateSchema) MoveAfter(path, relativeTo []string) *UpdateSchema
func (*UpdateSchema) MoveBefore ¶
func (u *UpdateSchema) MoveBefore(path, relativeTo []string) *UpdateSchema
func (*UpdateSchema) MoveColumn ¶
func (u *UpdateSchema) MoveColumn(op MoveOp, path, relativeTo []string) *UpdateSchema
func (*UpdateSchema) MoveFirst ¶
func (u *UpdateSchema) MoveFirst(path []string) *UpdateSchema
func (*UpdateSchema) RenameColumn ¶
func (u *UpdateSchema) RenameColumn(path []string, newName string) *UpdateSchema
func (*UpdateSchema) SetIdentifierField ¶
func (u *UpdateSchema) SetIdentifierField(paths [][]string) *UpdateSchema
func (*UpdateSchema) UpdateColumn ¶
func (u *UpdateSchema) UpdateColumn(path []string, update ColumnUpdate) *UpdateSchema
type UpdateSchemaOption ¶
type UpdateSchemaOption func(*UpdateSchema)
UpdateSchemaOption is a functional option for configuring UpdateSchema.
func WithNameMapping ¶
func WithNameMapping(nameMapping iceberg.NameMapping) UpdateSchemaOption
WithNameMapping configures the UpdateSchema to use the provided name mapping for tracking field name changes and ensuring consistency during schema evolution.
type UpdateSpec ¶
type UpdateSpec struct {
// contains filtered or unexported fields
}
UpdateSpec implements a builder for evolving a table's partition specification.
It accumulates a sequence of partition spec update operations (e.g., AddField, RemoveField, RenameField) which are applied during BuildUpdates.
Use the builder methods to chain operations, and call BuildUpdates to apply them and produce the final set of partition fields and update requirements, or call Commit to apply the updates in the transaction.
func NewUpdateSpec ¶
func NewUpdateSpec(t *Transaction, caseSensitive bool) *UpdateSpec
func (*UpdateSpec) AddField ¶
func (us *UpdateSpec) AddField(sourceColName string, transform iceberg.Transform, partitionFieldName string) *UpdateSpec
func (*UpdateSpec) AddIdentity ¶
func (us *UpdateSpec) AddIdentity(sourceColName string) *UpdateSpec
func (*UpdateSpec) Apply ¶
func (us *UpdateSpec) Apply() (iceberg.PartitionSpec, error)
func (*UpdateSpec) BuildUpdates ¶
func (us *UpdateSpec) BuildUpdates() ([]Update, []Requirement, error)
func (*UpdateSpec) Commit ¶
func (us *UpdateSpec) Commit() error
func (*UpdateSpec) RemoveField ¶
func (us *UpdateSpec) RemoveField(name string) *UpdateSpec
func (*UpdateSpec) RenameField ¶
func (us *UpdateSpec) RenameField(name string, newName string) *UpdateSpec
type WriteTask ¶
type WriteTask struct {
Uuid uuid.UUID
ID int
PartitionID int // PartitionID is the partition identifier used in data file naming.
FileCount int // FileCount is a sequential counter for files written by this task.
Schema *iceberg.Schema
Batches []arrow.RecordBatch
SortOrderID int
}
func (WriteTask) GenerateDataFileName ¶
Source Files
¶
- arrow_scanner.go
- arrow_utils.go
- evaluators.go
- locations.go
- metadata.go
- metadata_schema_compatibility.go
- orphan_cleanup.go
- partitioned_fanout_writer.go
- pos_delete_partitioned_fanout_writer.go
- properties.go
- refs.go
- requirements.go
- rolling_data_writer.go
- scanner.go
- snapshot_producers.go
- snapshots.go
- sorting.go
- statistics.go
- table.go
- transaction.go
- update_schema.go
- update_spec.go
- updates.go
- writer.go