Documentation
¶
Index ¶
- Constants
- Variables
- func ArrowSchemaToIceberg(sc *arrow.Schema, downcastNsTimestamp bool, nameMapping iceberg.NameMapping) (*iceberg.Schema, error)
- func ArrowTypeToIceberg(dt arrow.DataType, downcastNsTimestamp bool) (iceberg.Type, error)
- func NewRemoveSnapshotRefUpdate(ref string) *removeSnapshotRefUpdate
- func NewSetPropertiesUpdate(updates iceberg.Properties) *setPropertiesUpdate
- func SchemaToArrowSchema(sc *iceberg.Schema, metadata map[string]string, ...) (*arrow.Schema, error)
- func ToRequestedSchema(ctx context.Context, requested, fileSchema *iceberg.Schema, batch arrow.Record, ...) (arrow.Record, error)
- func TypeToArrowType(t iceberg.Type, includeFieldIDs bool, useLargeTypes bool) (arrow.DataType, error)
- func VisitArrowSchema[T any](sc *arrow.Schema, visitor ArrowSchemaVisitor[T]) (res T, err error)
- func WithMaxRefAgeMs(maxRefAgeMs int64) setSnapshotRefOption
- func WithMaxSnapshotAgeMs(maxSnapshotAgeMs int64) setSnapshotRefOption
- func WithMinSnapshotsToKeep(minSnapshotsToKeep int) setSnapshotRefOption
- type ArrowSchemaVisitor
- type CatalogIO
- type FileScanTask
- type Identifier
- type LocationProvider
- type Metadata
- func NewMetadata(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, ...) (Metadata, error)
- func NewMetadataWithUUID(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, ...) (Metadata, error)
- func ParseMetadata(r io.Reader) (Metadata, error)
- func ParseMetadataBytes(b []byte) (Metadata, error)
- func ParseMetadataString(s string) (Metadata, error)
- type MetadataBuilder
- func (b *MetadataBuilder) AddPartitionSpec(spec *iceberg.PartitionSpec, initial bool) (*MetadataBuilder, error)
- func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema, newLastColumnID int, initial bool) (*MetadataBuilder, error)
- func (b *MetadataBuilder) AddSnapshot(snapshot *Snapshot) (*MetadataBuilder, error)
- func (b *MetadataBuilder) AddSortOrder(sortOrder *SortOrder, initial bool) (*MetadataBuilder, error)
- func (b *MetadataBuilder) AppendMetadataLog(entry MetadataLogEntry) *MetadataBuilder
- func (b *MetadataBuilder) Build() (Metadata, error)
- func (b *MetadataBuilder) CurrentSchema() *iceberg.Schema
- func (b *MetadataBuilder) CurrentSpec() iceberg.PartitionSpec
- func (b *MetadataBuilder) GetSchemaByID(id int) (*iceberg.Schema, error)
- func (b *MetadataBuilder) GetSortOrderByID(id int) (*SortOrder, error)
- func (b *MetadataBuilder) GetSpecByID(id int) (*iceberg.PartitionSpec, error)
- func (b *MetadataBuilder) HasChanges() bool
- func (b *MetadataBuilder) LastUpdatedMS() int64
- func (b *MetadataBuilder) NameMapping() iceberg.NameMapping
- func (b *MetadataBuilder) RemoveProperties(keys []string) (*MetadataBuilder, error)
- func (b *MetadataBuilder) SetCurrentSchemaID(currentSchemaID int) (*MetadataBuilder, error)
- func (b *MetadataBuilder) SetDefaultSortOrderID(defaultSortOrderID int) (*MetadataBuilder, error)
- func (b *MetadataBuilder) SetDefaultSpecID(defaultSpecID int) (*MetadataBuilder, error)
- func (b *MetadataBuilder) SetFormatVersion(formatVersion int) (*MetadataBuilder, error)
- func (b *MetadataBuilder) SetLastUpdatedMS() *MetadataBuilder
- func (b *MetadataBuilder) SetLoc(loc string) (*MetadataBuilder, error)
- func (b *MetadataBuilder) SetProperties(props iceberg.Properties) (*MetadataBuilder, error)
- func (b *MetadataBuilder) SetSnapshotRef(name string, snapshotID int64, refType RefType, ...) (*MetadataBuilder, error)
- func (b *MetadataBuilder) SetUUID(uuid uuid.UUID) (*MetadataBuilder, error)
- func (b *MetadataBuilder) SnapshotByID(id int64) (*Snapshot, error)
- func (b *MetadataBuilder) TrimMetadataLogs(maxEntries int) *MetadataBuilder
- type MetadataLogEntry
- type NullOrder
- type Operation
- type RefType
- type Requirement
- func AssertCreate() Requirement
- func AssertCurrentSchemaID(id int) Requirement
- func AssertDefaultSortOrderID(id int) Requirement
- func AssertDefaultSpecID(id int) Requirement
- func AssertLastAssignedFieldID(id int) Requirement
- func AssertLastAssignedPartitionID(id int) Requirement
- func AssertRefSnapshotID(ref string, id *int64) Requirement
- func AssertTableUUID(uuid uuid.UUID) Requirement
- type Scan
- func (scan *Scan) PlanFiles(ctx context.Context) ([]FileScanTask, error)
- func (scan *Scan) Projection() (*iceberg.Schema, error)
- func (scan *Scan) Snapshot() *Snapshot
- func (scan *Scan) ToArrowRecords(ctx context.Context) (*arrow.Schema, iter.Seq2[arrow.Record, error], error)
- func (scan *Scan) ToArrowTable(ctx context.Context) (arrow.Table, error)
- func (scan *Scan) UseRef(name string) (*Scan, error)
- func (scan *Scan) UseRowLimit(n int64) *Scan
- type ScanOption
- func WitMaxConcurrency(n int) ScanOption
- func WithCaseSensitive(b bool) ScanOption
- func WithLimit(n int64) ScanOption
- func WithOptions(opts iceberg.Properties) ScanOption
- func WithRowFilter(e iceberg.BooleanExpression) ScanOption
- func WithSelectedFields(fields ...string) ScanOption
- func WithSnapshotID(n int64) ScanOption
- type Snapshot
- type SnapshotLogEntry
- type SnapshotRef
- type SnapshotSummaryCollector
- type SortDirection
- type SortField
- type SortOrder
- type StagedTable
- type Summary
- type Table
- func (t Table) AllManifests() iter.Seq2[iceberg.ManifestFile, error]
- func (t Table) CurrentSnapshot() *Snapshot
- func (t Table) Equals(other Table) bool
- func (t Table) FS() io.IO
- func (t Table) Identifier() Identifier
- func (t Table) Location() string
- func (t Table) LocationProvider() (LocationProvider, error)
- func (t Table) Metadata() Metadata
- func (t Table) MetadataLocation() string
- func (t Table) NameMapping() iceberg.NameMapping
- func (t Table) NewTransaction() *Transaction
- func (t Table) Properties() iceberg.Properties
- func (t Table) Scan(opts ...ScanOption) *Scan
- func (t Table) Schema() *iceberg.Schema
- func (t Table) Schemas() map[int]*iceberg.Schema
- func (t Table) SnapshotByID(id int64) *Snapshot
- func (t Table) SnapshotByName(name string) *Snapshot
- func (t Table) SortOrder() SortOrder
- func (t Table) Spec() iceberg.PartitionSpec
- type Transaction
- func (t *Transaction) AddFiles(files []string, snapshotProps iceberg.Properties, ignoreDuplicates bool) error
- func (t *Transaction) Append(rdr array.RecordReader, snapshotProps iceberg.Properties) error
- func (t *Transaction) Commit(ctx context.Context) (*Table, error)
- func (t *Transaction) Scan(opts ...ScanOption) (*Scan, error)
- func (t *Transaction) SetProperties(props iceberg.Properties) error
- func (t *Transaction) StagedTable() (*StagedTable, error)
- type Update
- func NewAddPartitionSpecUpdate(spec *iceberg.PartitionSpec, initial bool) Update
- func NewAddSchemaUpdate(schema *iceberg.Schema, lastColumnID int, initial bool) Update
- func NewAddSnapshotUpdate(snapshot *Snapshot) Update
- func NewAddSortOrderUpdate(sortOrder *SortOrder, initial bool) Update
- func NewAssignUUIDUpdate(uuid uuid.UUID) Update
- func NewRemovePropertiesUpdate(removals []string) Update
- func NewRemoveSnapshotsUpdate(ids []int64) Update
- func NewSetCurrentSchemaUpdate(id int) Update
- func NewSetDefaultSortOrderUpdate(id int) Update
- func NewSetDefaultSpecUpdate(id int) Update
- func NewSetLocationUpdate(loc string) Update
- func NewSetSnapshotRefUpdate(name string, snapshotID int64, refType RefType, ...) Update
- func NewUpgradeFormatVersionUpdate(formatVersion int) Update
Constants ¶
const ( ArrowFieldDocKey = "doc" // Arrow schemas that are generated from the Parquet library will utilize // this key to identify the field id of the source Parquet field. // We use this when converting to Iceberg to provide field IDs ArrowParquetFieldIDKey = "PARQUET:field_id" )
constants to look for as Keys in Arrow field metadata
const ( WriteDataPathKey = "write.data.path" WriteMetadataPathKey = "write.metadata.path" WriteObjectStorePartitionedPathsKey = "write.object-storage.partitioned-paths" WriteObjectStorePartitionedPathsDefault = true ObjectStoreEnabledKey = "write.object-storage.enabled" ObjectStoreEnabledDefault = false DefaultNameMappingKey = "schema.name-mapping.default" MetricsModeColumnConfPrefix = "write.metadata.metrics.column" DefaultWriteMetricsModeKey = "write.metadata.metrics.default" DefaultWriteMetricsModeDefault = "truncate(16)" ManifestMergeEnabledKey = "commit.manifest-merge.enabled" ManifestMergeEnabledDefault = false ManifestTargetSizeBytesKey = "commit.manifest.target-size-bytes" ManifestTargetSizeBytesDefault = 8 * 1024 * 1024 // 8 MB ManifestMinMergeCountKey = "commit.manifest.min-count-to-merge" ManifestMinMergeCountDefault = 100 WritePartitionSummaryLimitKey = "write.summary.partition-limit" WritePartitionSummaryLimitDefault = 0 MetadataDeleteAfterCommitEnabledKey = "write.metadata.delete-after-commit.enabled" MetadataDeleteAfterCommitEnabledDefault = false MetadataPreviousVersionsMaxKey = "write.metadata.previous-versions-max" MetadataPreviousVersionsMaxDefault = 100 )
const ( InitialSortOrderID = 1 UnsortedSortOrderID = 0 )
const DefaultFormatVersion = 2
const MainBranch = "main"
const ScanNoLimit = -1
const (
ScanOptionArrowUseLargeTypes = "arrow.use_large_types"
)
Variables ¶
var ( ErrInvalidMetadataFormatVersion = errors.New("invalid or missing format-version in table metadata") ErrInvalidMetadata = errors.New("invalid metadata") )
var ( ErrInvalidOperation = errors.New("invalid operation value") ErrMissingOperation = errors.New("missing operation key") )
var ( ErrInvalidSortDirection = errors.New("invalid sort direction, must be 'asc' or 'desc'") ErrInvalidNullOrder = errors.New("invalid null order, must be 'nulls-first' or 'nulls-last'") )
var ErrInvalidRefType = errors.New("invalid snapshot ref type, should be 'branch' or 'tag'")
var UnsortedSortOrder = SortOrder{OrderID: UnsortedSortOrderID, Fields: []SortField{}}
A default Sort Order indicating no sort order at all
Functions ¶
func ArrowSchemaToIceberg ¶
func ArrowTypeToIceberg ¶
func NewRemoveSnapshotRefUpdate ¶ added in v0.2.0
func NewRemoveSnapshotRefUpdate(ref string) *removeSnapshotRefUpdate
NewRemoveSnapshotRefUpdate creates a new update that removes a snapshot reference from the table metadata.
func NewSetPropertiesUpdate ¶ added in v0.2.0
func NewSetPropertiesUpdate(updates iceberg.Properties) *setPropertiesUpdate
NewSetPropertiesUpdate creates a new update that sets the given properties in the table metadata.
func SchemaToArrowSchema ¶
func SchemaToArrowSchema(sc *iceberg.Schema, metadata map[string]string, includeFieldIDs, useLargeTypes bool) (*arrow.Schema, error)
SchemaToArrowSchema converts an Iceberg schema to an Arrow schema. If the metadata parameter is non-nil, it will be included as the top-level metadata in the schema. If includeFieldIDs is true, then each field of the schema will contain a metadata key PARQUET:field_id set to the field id from the iceberg schema.
func ToRequestedSchema ¶
func ToRequestedSchema(ctx context.Context, requested, fileSchema *iceberg.Schema, batch arrow.Record, downcastTimestamp, includeFieldIDs, useLargeTypes bool) (arrow.Record, error)
ToRequestedSchema will construct a new record batch matching the requested iceberg schema casting columns if necessary as appropriate.
func TypeToArrowType ¶
func TypeToArrowType(t iceberg.Type, includeFieldIDs bool, useLargeTypes bool) (arrow.DataType, error)
TypeToArrowType converts a given iceberg type, into the equivalent Arrow data type. For dealing with nested fields (List, Struct, Map) if includeFieldIDs is true, then the child fields will contain a metadata key PARQUET:field_id set to the field id.
func VisitArrowSchema ¶
func VisitArrowSchema[T any](sc *arrow.Schema, visitor ArrowSchemaVisitor[T]) (res T, err error)
func WithMaxRefAgeMs ¶ added in v0.2.0
func WithMaxRefAgeMs(maxRefAgeMs int64) setSnapshotRefOption
func WithMaxSnapshotAgeMs ¶ added in v0.2.0
func WithMaxSnapshotAgeMs(maxSnapshotAgeMs int64) setSnapshotRefOption
func WithMinSnapshotsToKeep ¶ added in v0.2.0
func WithMinSnapshotsToKeep(minSnapshotsToKeep int) setSnapshotRefOption
Types ¶
type ArrowSchemaVisitor ¶
type ArrowSchemaVisitor[T any] interface { Schema(*arrow.Schema, T) T Struct(*arrow.StructType, []T) T Field(arrow.Field, T) T List(arrow.ListLikeType, T) T Map(mt *arrow.MapType, keyResult T, valueResult T) T Primitive(arrow.DataType) T }
ArrowSchemaVisitor is an interface that can be implemented and used to call VisitArrowSchema for iterating
type CatalogIO ¶ added in v0.2.0
type CatalogIO interface {
LoadTable(context.Context, Identifier, iceberg.Properties) (*Table, error)
CommitTable(context.Context, *Table, []Requirement, []Update) (Metadata, string, error)
}
type FileScanTask ¶
type Identifier ¶
type Identifier = []string
type LocationProvider ¶ added in v0.2.0
type LocationProvider interface {
NewTableMetadataFileLocation(newVersion int) (string, error)
NewMetadataLocation(metadataFileName string) string
}
func LoadLocationProvider ¶ added in v0.2.0
func LoadLocationProvider(tableLocation string, tableProps iceberg.Properties) (LocationProvider, error)
type Metadata ¶
type Metadata interface {
// Version indicates the version of this metadata, 1 for V1, 2 for V2, etc.
Version() int
// TableUUID returns a UUID that identifies the table, generated when the
// table is created. Implementations must throw an exception if a table's
// UUID does not match the expected UUID after refreshing metadata.
TableUUID() uuid.UUID
// Location is the table's base location. This is used by writers to determine
// where to store data files, manifest files, and table metadata files.
Location() string
// LastUpdatedMillis is the timestamp in milliseconds from the unix epoch when
// the table was last updated. Each table metadata file should update this
// field just before writing.
LastUpdatedMillis() int64
// LastColumnID returns the highest assigned column ID for the table.
// This is used to ensure fields are always assigned an unused ID when
// evolving schemas.
LastColumnID() int
// Schemas returns the list of schemas, stored as objects with their
// schema-id.
Schemas() []*iceberg.Schema
// CurrentSchema returns the table's current schema.
CurrentSchema() *iceberg.Schema
// PartitionSpecs returns the list of all partition specs in the table.
PartitionSpecs() []iceberg.PartitionSpec
// PartitionSpec returns the current partition spec that the table is using.
PartitionSpec() iceberg.PartitionSpec
// DefaultPartitionSpec is the ID of the current spec that writers should
// use by default.
DefaultPartitionSpec() int
// LastPartitionSpecID is the highest assigned partition field ID across
// all partition specs for the table. This is used to ensure partition
// fields are always assigned an unused ID when evolving specs.
LastPartitionSpecID() *int
// Snapshots returns the list of valid snapshots. Valid snapshots are
// snapshots for which all data files exist in the file system. A data
// file must not be deleted from the file system until the last snapshot
// in which it was listed is garbage collected.
Snapshots() []Snapshot
// SnapshotByID find and return a specific snapshot by its ID. Returns
// nil if the ID is not found in the list of snapshots.
SnapshotByID(int64) *Snapshot
// SnapshotByName searches the list of snapshots for a snapshot with a given
// ref name. Returns nil if there's no ref with this name for a snapshot.
SnapshotByName(name string) *Snapshot
// CurrentSnapshot returns the table's current snapshot.
CurrentSnapshot() *Snapshot
// Ref returns the snapshot ref for the main branch.
Ref() SnapshotRef
// Refs returns a list of snapshot name/reference pairs.
Refs() iter.Seq2[string, SnapshotRef]
// SnapshotLogs returns the list of snapshot logs for the table.
SnapshotLogs() iter.Seq[SnapshotLogEntry]
// SortOrder returns the table's current sort order, ie: the one with the
// ID that matches the default-sort-order-id.
SortOrder() SortOrder
// SortOrders returns the list of sort orders in the table.
SortOrders() []SortOrder
// DefaultSortOrder returns the ID of the current sort order that writers
// should use by default.
DefaultSortOrder() int
// Properties is a string to string map of table properties. This is used
// to control settings that affect reading and writing and is not intended
// to be used for arbitrary metadata. For example, commit.retry.num-retries
// is used to control the number of commit retries.
Properties() iceberg.Properties
// PreviousFiles returns the list of metadata log entries for the table.
PreviousFiles() iter.Seq[MetadataLogEntry]
Equals(Metadata) bool
NameMapping() iceberg.NameMapping
LastSequenceNumber() int64
}
Metadata for an iceberg table as specified in the Iceberg spec
https://iceberg.apache.org/spec/#iceberg-table-spec
func NewMetadata ¶ added in v0.2.0
func NewMetadata(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, location string, props iceberg.Properties) (Metadata, error)
NewMetadata creates a new table metadata object using the provided schema, information, generating a fresh UUID for the new table metadata. By default, this will generate a V2 table metadata, but this can be modified by adding a "format-version" property to the props map. An error will be returned if the "format-version" property exists and is not a valid version number.
func NewMetadataWithUUID ¶ added in v0.2.0
func NewMetadataWithUUID(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, location string, props iceberg.Properties, tableUuid uuid.UUID) (Metadata, error)
NewMetadataWithUUID is like NewMetadata, but allows the caller to specify the UUID of the table rather than creating a new one.
func ParseMetadata ¶
ParseMetadata parses json metadata provided by the passed in reader, returning an error if one is encountered.
func ParseMetadataBytes ¶
ParseMetadataBytes is like ParseMetadataString but for a byte slice.
func ParseMetadataString ¶
ParseMetadataString is like ParseMetadata, but for a string rather than an io.Reader.
type MetadataBuilder ¶ added in v0.2.0
type MetadataBuilder struct {
// contains filtered or unexported fields
}
func MetadataBuilderFromBase ¶ added in v0.2.0
func MetadataBuilderFromBase(metadata Metadata) (*MetadataBuilder, error)
func NewMetadataBuilder ¶ added in v0.2.0
func NewMetadataBuilder() (*MetadataBuilder, error)
func (*MetadataBuilder) AddPartitionSpec ¶ added in v0.2.0
func (b *MetadataBuilder) AddPartitionSpec(spec *iceberg.PartitionSpec, initial bool) (*MetadataBuilder, error)
func (*MetadataBuilder) AddSchema ¶ added in v0.2.0
func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema, newLastColumnID int, initial bool) (*MetadataBuilder, error)
func (*MetadataBuilder) AddSnapshot ¶ added in v0.2.0
func (b *MetadataBuilder) AddSnapshot(snapshot *Snapshot) (*MetadataBuilder, error)
func (*MetadataBuilder) AddSortOrder ¶ added in v0.2.0
func (b *MetadataBuilder) AddSortOrder(sortOrder *SortOrder, initial bool) (*MetadataBuilder, error)
func (*MetadataBuilder) AppendMetadataLog ¶ added in v0.2.0
func (b *MetadataBuilder) AppendMetadataLog(entry MetadataLogEntry) *MetadataBuilder
func (*MetadataBuilder) Build ¶ added in v0.2.0
func (b *MetadataBuilder) Build() (Metadata, error)
func (*MetadataBuilder) CurrentSchema ¶ added in v0.2.0
func (b *MetadataBuilder) CurrentSchema() *iceberg.Schema
func (*MetadataBuilder) CurrentSpec ¶ added in v0.2.0
func (b *MetadataBuilder) CurrentSpec() iceberg.PartitionSpec
func (*MetadataBuilder) GetSchemaByID ¶ added in v0.2.0
func (b *MetadataBuilder) GetSchemaByID(id int) (*iceberg.Schema, error)
func (*MetadataBuilder) GetSortOrderByID ¶ added in v0.2.0
func (b *MetadataBuilder) GetSortOrderByID(id int) (*SortOrder, error)
func (*MetadataBuilder) GetSpecByID ¶ added in v0.2.0
func (b *MetadataBuilder) GetSpecByID(id int) (*iceberg.PartitionSpec, error)
func (*MetadataBuilder) HasChanges ¶ added in v0.2.0
func (b *MetadataBuilder) HasChanges() bool
func (*MetadataBuilder) LastUpdatedMS ¶ added in v0.2.0
func (b *MetadataBuilder) LastUpdatedMS() int64
func (*MetadataBuilder) NameMapping ¶ added in v0.2.0
func (b *MetadataBuilder) NameMapping() iceberg.NameMapping
func (*MetadataBuilder) RemoveProperties ¶ added in v0.2.0
func (b *MetadataBuilder) RemoveProperties(keys []string) (*MetadataBuilder, error)
func (*MetadataBuilder) SetCurrentSchemaID ¶ added in v0.2.0
func (b *MetadataBuilder) SetCurrentSchemaID(currentSchemaID int) (*MetadataBuilder, error)
func (*MetadataBuilder) SetDefaultSortOrderID ¶ added in v0.2.0
func (b *MetadataBuilder) SetDefaultSortOrderID(defaultSortOrderID int) (*MetadataBuilder, error)
func (*MetadataBuilder) SetDefaultSpecID ¶ added in v0.2.0
func (b *MetadataBuilder) SetDefaultSpecID(defaultSpecID int) (*MetadataBuilder, error)
func (*MetadataBuilder) SetFormatVersion ¶ added in v0.2.0
func (b *MetadataBuilder) SetFormatVersion(formatVersion int) (*MetadataBuilder, error)
func (*MetadataBuilder) SetLastUpdatedMS ¶ added in v0.2.0
func (b *MetadataBuilder) SetLastUpdatedMS() *MetadataBuilder
func (*MetadataBuilder) SetLoc ¶ added in v0.2.0
func (b *MetadataBuilder) SetLoc(loc string) (*MetadataBuilder, error)
func (*MetadataBuilder) SetProperties ¶ added in v0.2.0
func (b *MetadataBuilder) SetProperties(props iceberg.Properties) (*MetadataBuilder, error)
func (*MetadataBuilder) SetSnapshotRef ¶ added in v0.2.0
func (b *MetadataBuilder) SetSnapshotRef( name string, snapshotID int64, refType RefType, options ...setSnapshotRefOption, ) (*MetadataBuilder, error)
func (*MetadataBuilder) SetUUID ¶ added in v0.2.0
func (b *MetadataBuilder) SetUUID(uuid uuid.UUID) (*MetadataBuilder, error)
func (*MetadataBuilder) SnapshotByID ¶ added in v0.2.0
func (b *MetadataBuilder) SnapshotByID(id int64) (*Snapshot, error)
func (*MetadataBuilder) TrimMetadataLogs ¶ added in v0.2.0
func (b *MetadataBuilder) TrimMetadataLogs(maxEntries int) *MetadataBuilder
type MetadataLogEntry ¶
type Operation ¶
type Operation string
func ValidOperation ¶
ValidOperation ensures that a given string is one of the valid operation types: append,replace,overwrite,delete
type Requirement ¶ added in v0.2.0
type Requirement interface {
// Validate checks that the current table metadata satisfies the requirement.
Validate(Metadata) error
GetType() string
}
A Requirement is a validation rule that must be satisfied before attempting to make and commit changes to a table. Requirements are used to ensure that the table is in a valid state before making changes.
func AssertCreate ¶ added in v0.2.0
func AssertCreate() Requirement
AssertCreate creates a requirement that the table does not already exist.
func AssertCurrentSchemaID ¶ added in v0.2.0
func AssertCurrentSchemaID(id int) Requirement
AssertCurrentSchemaId creates a requirement that the table's current schema ID matches the given id.
func AssertDefaultSortOrderID ¶ added in v0.2.0
func AssertDefaultSortOrderID(id int) Requirement
AssertDefaultSortOrderID creates a requirement that the table's default sort order ID matches the given id.
func AssertDefaultSpecID ¶ added in v0.2.0
func AssertDefaultSpecID(id int) Requirement
AssertDefaultSpecID creates a requirement that the table's default partition spec ID matches the given id.
func AssertLastAssignedFieldID ¶ added in v0.2.0
func AssertLastAssignedFieldID(id int) Requirement
AssertLastAssignedFieldID validates that the table's last assigned column ID matches the given id.
func AssertLastAssignedPartitionID ¶ added in v0.2.0
func AssertLastAssignedPartitionID(id int) Requirement
AssertLastAssignedPartitionID creates a requriement that the table's last assigned partition ID matches the given id.
func AssertRefSnapshotID ¶ added in v0.2.0
func AssertRefSnapshotID(ref string, id *int64) Requirement
AssertRefSnapshotID creates a requirement which ensures that the table branch or tag identified by the given ref must reference the given snapshot id. If the id is nil, the ref must not already exist.
func AssertTableUUID ¶ added in v0.2.0
func AssertTableUUID(uuid uuid.UUID) Requirement
AssertTableUUID creates a requirement that the table UUID matches the given UUID.
type Scan ¶
type Scan struct {
// contains filtered or unexported fields
}
func (*Scan) PlanFiles ¶
func (scan *Scan) PlanFiles(ctx context.Context) ([]FileScanTask, error)
PlanFiles orchestrates the fetching and filtering of manifests, and then building a list of FileScanTasks that match the current Scan criteria.
func (*Scan) ToArrowRecords ¶
func (scan *Scan) ToArrowRecords(ctx context.Context) (*arrow.Schema, iter.Seq2[arrow.Record, error], error)
ToArrowRecords returns the arrow schema of the expected records and an interator that can be used with a range expression to read the records as they are available. If an error is encountered, during the planning and setup then this will return the error directly. If the error occurs while iterating the records, it will be returned by the iterator.
The purpose for returning the schema up front is to handle the case where there are no rows returned. The resulting Arrow Schema of the projection will still be known.
func (*Scan) ToArrowTable ¶
ToArrowTable calls ToArrowRecords and then gathers all of the records together and returns an arrow.Table make from those records.
func (*Scan) UseRowLimit ¶
type ScanOption ¶
type ScanOption func(*Scan)
func WitMaxConcurrency ¶
func WitMaxConcurrency(n int) ScanOption
WitMaxConcurrency sets the maximum concurrency for table scan and plan operations. When unset it defaults to runtime.GOMAXPROCS.
func WithCaseSensitive ¶
func WithCaseSensitive(b bool) ScanOption
func WithLimit ¶
func WithLimit(n int64) ScanOption
func WithOptions ¶
func WithOptions(opts iceberg.Properties) ScanOption
func WithRowFilter ¶
func WithRowFilter(e iceberg.BooleanExpression) ScanOption
func WithSelectedFields ¶
func WithSelectedFields(fields ...string) ScanOption
func WithSnapshotID ¶
func WithSnapshotID(n int64) ScanOption
type Snapshot ¶
type Snapshot struct {
SnapshotID int64 `json:"snapshot-id"`
ParentSnapshotID *int64 `json:"parent-snapshot-id,omitempty"`
SequenceNumber int64 `json:"sequence-number"`
TimestampMs int64 `json:"timestamp-ms"`
ManifestList string `json:"manifest-list,omitempty"`
Summary *Summary `json:"summary,omitempty"`
SchemaID *int `json:"schema-id,omitempty"`
}
type SnapshotLogEntry ¶
type SnapshotRef ¶
type SnapshotRef struct {
SnapshotID int64 `json:"snapshot-id"`
SnapshotRefType RefType `json:"type"`
MinSnapshotsToKeep *int `json:"min-snapshots-to-keep,omitempty"`
MaxSnapshotAgeMs *int64 `json:"max-snapshot-age-ms,omitempty"`
MaxRefAgeMs *int64 `json:"max-ref-age-ms,omitempty"`
}
SnapshotRef represents the reference information for a specific snapshot
func (*SnapshotRef) Equals ¶
func (s *SnapshotRef) Equals(rhs SnapshotRef) bool
func (*SnapshotRef) UnmarshalJSON ¶
func (s *SnapshotRef) UnmarshalJSON(b []byte) error
type SnapshotSummaryCollector ¶ added in v0.2.0
type SnapshotSummaryCollector struct {
// contains filtered or unexported fields
}
type SortDirection ¶
type SortDirection string
const ( SortASC SortDirection = "asc" SortDESC SortDirection = "desc" )
type SortField ¶
type SortField struct {
// SourceID is the source column id from the table's schema
SourceID int `json:"source-id"`
// Transform is the tranformation used to produce values to be
// sorted on from the source column.
Transform iceberg.Transform `json:"transform"`
// Direction is an enum indicating ascending or descending direction.
Direction SortDirection `json:"direction"`
// NullOrder describes the order of null values when sorting
// should be only either nulls-first or nulls-last enum values.
NullOrder NullOrder `json:"null-order"`
}
SortField describes a field used in a sort order definition.
func (*SortField) MarshalJSON ¶
func (*SortField) UnmarshalJSON ¶
type SortOrder ¶
SortOrder describes how the data is sorted within the table.
Data can be sorted within partitions by columns to gain performance. The order of the sort fields within the list defines the order in which the sort is applied to the data.
func AssignFreshSortOrderIDs ¶ added in v0.2.0
AssignFreshSortOrderIDs updates and reassigns the field source IDs from the old schema to the corresponding fields in the fresh schema, while also giving the Sort Order a fresh ID of 0 (the initial Sort Order ID).
func AssignFreshSortOrderIDsWithID ¶ added in v0.2.0
func AssignFreshSortOrderIDsWithID(sortOrder SortOrder, old, fresh *iceberg.Schema, sortOrderID int) (SortOrder, error)
AssignFreshSortOrderIDsWithID is like AssignFreshSortOrderIDs but allows specifying the id of the returned SortOrder.
func (*SortOrder) UnmarshalJSON ¶
type StagedTable ¶ added in v0.2.0
type StagedTable struct {
*Table
}
func (*StagedTable) Refresh ¶ added in v0.2.0
func (s *StagedTable) Refresh(ctx context.Context) (*Table, error)
func (*StagedTable) Scan ¶ added in v0.2.0
func (s *StagedTable) Scan(opts ...ScanOption) *Scan
type Summary ¶
type Summary struct {
Operation Operation
Properties iceberg.Properties
}
Summary stores the summary information for a snapshot indicating the operation that created the snapshot, and various properties which might exist in the summary.
func (*Summary) MarshalJSON ¶
func (*Summary) UnmarshalJSON ¶
type Table ¶
type Table struct {
// contains filtered or unexported fields
}
func NewFromLocation ¶
func (Table) AllManifests ¶ added in v0.2.0
func (Table) CurrentSnapshot ¶
func (Table) Identifier ¶
func (t Table) Identifier() Identifier
func (Table) LocationProvider ¶ added in v0.2.0
func (t Table) LocationProvider() (LocationProvider, error)
func (Table) MetadataLocation ¶
func (Table) NameMapping ¶ added in v0.2.0
func (t Table) NameMapping() iceberg.NameMapping
func (Table) NewTransaction ¶ added in v0.2.0
func (t Table) NewTransaction() *Transaction
func (Table) Properties ¶
func (t Table) Properties() iceberg.Properties
func (Table) Scan ¶
func (t Table) Scan(opts ...ScanOption) *Scan
func (Table) SnapshotByID ¶
func (Table) SnapshotByName ¶
func (Table) Spec ¶
func (t Table) Spec() iceberg.PartitionSpec
type Transaction ¶ added in v0.2.0
type Transaction struct {
// contains filtered or unexported fields
}
func (*Transaction) AddFiles ¶ added in v0.2.0
func (t *Transaction) AddFiles(files []string, snapshotProps iceberg.Properties, ignoreDuplicates bool) error
func (*Transaction) Append ¶ added in v0.2.0
func (t *Transaction) Append(rdr array.RecordReader, snapshotProps iceberg.Properties) error
func (*Transaction) Commit ¶ added in v0.2.0
func (t *Transaction) Commit(ctx context.Context) (*Table, error)
func (*Transaction) Scan ¶ added in v0.2.0
func (t *Transaction) Scan(opts ...ScanOption) (*Scan, error)
func (*Transaction) SetProperties ¶ added in v0.2.0
func (t *Transaction) SetProperties(props iceberg.Properties) error
func (*Transaction) StagedTable ¶ added in v0.2.0
func (t *Transaction) StagedTable() (*StagedTable, error)
type Update ¶ added in v0.2.0
type Update interface {
// Action returns the name of the action that the update represents.
Action() string
// Apply applies the update to the given metadata builder.
Apply(*MetadataBuilder) error
}
Update represents a change to a table's metadata.
func NewAddPartitionSpecUpdate ¶ added in v0.2.0
func NewAddPartitionSpecUpdate(spec *iceberg.PartitionSpec, initial bool) Update
NewAddPartitionSpecUpdate creates a new update that adds the given partition spec to the table metadata. If the initial flag is set to true, the spec is considered the initial spec of the table, and all other previously added specs in the metadata builder are removed.
func NewAddSchemaUpdate ¶ added in v0.2.0
NewAddSchemaUpdate creates a new update that adds the given schema and last column ID to the table metadata. If the initial flag is set to true, the schema is considered the initial schema of the table, and all previously added schemas in the metadata builder are removed.
func NewAddSnapshotUpdate ¶ added in v0.2.0
NewAddSnapshotUpdate creates a new update that adds the given snapshot to the table metadata.
func NewAddSortOrderUpdate ¶ added in v0.2.0
NewAddSortOrderUpdate creates a new update that adds the given sort order to the table metadata. If the initial flag is set to true, the sort order is considered the initial sort order of the table, and all previously added sort orders in the metadata builder are removed.
func NewAssignUUIDUpdate ¶ added in v0.2.0
NewAssignUUIDUpdate creates a new update to assign a UUID to the table metadata.
func NewRemovePropertiesUpdate ¶ added in v0.2.0
NewRemovePropertiesUpdate creates a new update that removes properties from the table metadata. The properties are identified by their names, and if a property with the given name does not exist, it is ignored.
func NewRemoveSnapshotsUpdate ¶ added in v0.2.0
NewRemoveSnapshotsUpdate creates a new update that removes all snapshots from the table metadata with the given snapshot IDs.
func NewSetCurrentSchemaUpdate ¶ added in v0.2.0
NewSetCurrentSchemaUpdate creates a new update that sets the current schema of the table metadata to the given schema ID.
func NewSetDefaultSortOrderUpdate ¶ added in v0.2.0
NewSetDefaultSortOrderUpdate creates a new update that sets the default sort order of the table metadata to the given sort order ID.
func NewSetDefaultSpecUpdate ¶ added in v0.2.0
NewSetDefaultSpecUpdate creates a new update that sets the default partition spec of the table metadata to the given spec ID.
func NewSetLocationUpdate ¶ added in v0.2.0
NewSetLocationUpdate creates a new update that sets the location of the table metadata.
func NewSetSnapshotRefUpdate ¶ added in v0.2.0
func NewSetSnapshotRefUpdate( name string, snapshotID int64, refType RefType, maxRefAgeMs, maxSnapshotAgeMs int64, minSnapshotsToKeep int, ) Update
NewSetSnapshotRefUpdate creates a new update that sets the given snapshot reference as the current snapshot of the table metadata. MaxRefAgeMs, MaxSnapshotAgeMs, and MinSnapshotsToKeep are optional, and any non-positive values are ignored.
func NewUpgradeFormatVersionUpdate ¶ added in v0.2.0
NewUpgradeFormatVersionUpdate creates a new update that upgrades the format version of the table metadata to the given formatVersion.