Documentation
¶
Overview ¶
Package extensions provides implementations of Arrow canonical extension types as defined in the Arrow specification. https://arrow.apache.org/docs/format/CanonicalExtensions.html
Index ¶
- type Bool8Array
- type Bool8Builder
- func (b *Bool8Builder) Append(v bool)
- func (b *Bool8Builder) AppendValueFromString(s string) error
- func (b *Bool8Builder) AppendValues(v []bool, valid []bool)
- func (b *Bool8Builder) Unmarshal(dec *json.Decoder) error
- func (b *Bool8Builder) UnmarshalOne(dec *json.Decoder) error
- func (b *Bool8Builder) UnsafeAppend(v bool)
- type Bool8Type
- func (b *Bool8Type) ArrayType() reflect.Type
- func (b *Bool8Type) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error)
- func (b *Bool8Type) ExtensionEquals(other arrow.ExtensionType) bool
- func (b *Bool8Type) ExtensionName() string
- func (*Bool8Type) NewBuilder(mem memory.Allocator) array.Builder
- func (b *Bool8Type) Serialize() string
- func (b *Bool8Type) String() string
- type JSONArray
- func (a *JSONArray) GetOneForMarshal(i int) interface{}
- func (a *JSONArray) MarshalJSON() ([]byte, error)
- func (a *JSONArray) String() string
- func (a *JSONArray) Value(i int) any
- func (a *JSONArray) ValueBytes(i int) []byte
- func (a *JSONArray) ValueJSON(i int) json.RawMessage
- func (a *JSONArray) ValueStr(i int) string
- type JSONType
- func (b *JSONType) ArrayType() reflect.Type
- func (b *JSONType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error)
- func (b *JSONType) ExtensionEquals(other arrow.ExtensionType) bool
- func (b *JSONType) ExtensionName() string
- func (b *JSONType) ParquetLogicalType() schema.LogicalType
- func (b *JSONType) Serialize() string
- func (b *JSONType) String() string
- type OpaqueArray
- type OpaqueType
- func (*OpaqueType) ArrayType() reflect.Type
- func (*OpaqueType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error)
- func (o *OpaqueType) ExtensionEquals(other arrow.ExtensionType) bool
- func (*OpaqueType) ExtensionName() string
- func (o *OpaqueType) Serialize() string
- func (o *OpaqueType) String() string
- type UUIDArray
- type UUIDBuilder
- func (b *UUIDBuilder) Append(v uuid.UUID)
- func (b *UUIDBuilder) AppendBytes(v [16]byte)
- func (b *UUIDBuilder) AppendValueFromString(s string) error
- func (b *UUIDBuilder) AppendValues(v []uuid.UUID, valid []bool)
- func (b *UUIDBuilder) Unmarshal(dec *json.Decoder) error
- func (b *UUIDBuilder) UnmarshalJSON(data []byte) error
- func (b *UUIDBuilder) UnmarshalOne(dec *json.Decoder) error
- func (b *UUIDBuilder) UnsafeAppend(v uuid.UUID)
- type UUIDType
- func (*UUIDType) ArrayType() reflect.Type
- func (*UUIDType) BitWidth() int
- func (*UUIDType) Bytes() int
- func (*UUIDType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error)
- func (e *UUIDType) ExtensionEquals(other arrow.ExtensionType) bool
- func (*UUIDType) ExtensionName() string
- func (e *UUIDType) MarshalJSON() ([]byte, error)
- func (*UUIDType) NewBuilder(mem memory.Allocator) array.Builder
- func (e *UUIDType) ParquetLogicalType() schema.LogicalType
- func (*UUIDType) Serialize() string
- func (e *UUIDType) String() string
- type VariantArray
- func (v *VariantArray) GetOneForMarshal(i int) any
- func (v *VariantArray) IsNull(i int) bool
- func (v *VariantArray) IsShredded() bool
- func (v *VariantArray) IsValid(i int) bool
- func (v *VariantArray) MarshalJSON() ([]byte, error)
- func (v *VariantArray) Metadata() arrow.TypedArray[[]byte]
- func (v *VariantArray) Shredded() arrow.Array
- func (v *VariantArray) String() string
- func (v *VariantArray) UntypedValues() arrow.TypedArray[[]byte]
- func (v *VariantArray) Value(i int) (variant.Value, error)
- func (v *VariantArray) ValueStr(i int) string
- func (v *VariantArray) Values() ([]variant.Value, error)
- type VariantBuilder
- type VariantType
- func (*VariantType) ArrayType() reflect.Type
- func (*VariantType) Deserialize(storageType arrow.DataType, _ string) (arrow.ExtensionType, error)
- func (v *VariantType) ExtensionEquals(other arrow.ExtensionType) bool
- func (*VariantType) ExtensionName() string
- func (v *VariantType) Metadata() arrow.Field
- func (v *VariantType) NewBuilder(mem memory.Allocator) array.Builder
- func (*VariantType) ParquetLogicalType() schema.LogicalType
- func (*VariantType) Serialize() string
- func (v *VariantType) String() string
- func (v *VariantType) TypedValue() arrow.Field
- func (v *VariantType) Value() arrow.Field
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Bool8Array ¶
type Bool8Array struct {
array.ExtensionArrayBase
}
Bool8Array is logically an array of boolean values but uses 8 bits to store values instead of 1 bit as in the native BooleanArray.
func (*Bool8Array) BoolValues ¶
func (a *Bool8Array) BoolValues() []bool
func (*Bool8Array) GetOneForMarshal ¶
func (a *Bool8Array) GetOneForMarshal(i int) interface{}
func (*Bool8Array) MarshalJSON ¶
func (a *Bool8Array) MarshalJSON() ([]byte, error)
func (*Bool8Array) String ¶
func (a *Bool8Array) String() string
func (*Bool8Array) Value ¶
func (a *Bool8Array) Value(i int) bool
func (*Bool8Array) ValueStr ¶
func (a *Bool8Array) ValueStr(i int) string
type Bool8Builder ¶
type Bool8Builder struct {
*array.ExtensionBuilder
}
Bool8Builder is a convenience builder for the Bool8 extension type, allowing arrays to be built with boolean values rather than the underlying storage type.
func NewBool8Builder ¶
func NewBool8Builder(mem memory.Allocator) *Bool8Builder
NewBool8Builder creates a new Bool8Builder, exposing a convenient and efficient interface for writing boolean values to the underlying int8 storage array.
func (*Bool8Builder) Append ¶
func (b *Bool8Builder) Append(v bool)
func (*Bool8Builder) AppendValueFromString ¶
func (b *Bool8Builder) AppendValueFromString(s string) error
func (*Bool8Builder) AppendValues ¶
func (b *Bool8Builder) AppendValues(v []bool, valid []bool)
func (*Bool8Builder) UnmarshalOne ¶
func (b *Bool8Builder) UnmarshalOne(dec *json.Decoder) error
func (*Bool8Builder) UnsafeAppend ¶
func (b *Bool8Builder) UnsafeAppend(v bool)
type Bool8Type ¶
type Bool8Type struct {
arrow.ExtensionBase
}
Bool8Type represents a logical boolean that is stored using 8 bits.
func NewBool8Type ¶
func NewBool8Type() *Bool8Type
NewBool8Type creates a new Bool8Type with the underlying storage type set correctly to Int8.
func (*Bool8Type) Deserialize ¶
func (*Bool8Type) ExtensionEquals ¶
func (b *Bool8Type) ExtensionEquals(other arrow.ExtensionType) bool
func (*Bool8Type) ExtensionName ¶
type JSONArray ¶
type JSONArray struct {
array.ExtensionArrayBase
}
JSONArray is logically an array of UTF-8 encoded JSON strings. Its values are unmarshaled to native Go values.
func (*JSONArray) GetOneForMarshal ¶
GetOneForMarshal implements arrow.Array.
func (*JSONArray) MarshalJSON ¶
MarshalJSON implements json.Marshaler. Marshaling json.RawMessage is a no-op, except that nil values will be marshaled as a JSON null.
func (*JSONArray) ValueBytes ¶
type JSONType ¶
type JSONType struct {
arrow.ExtensionBase
}
JSONType represents a UTF-8 encoded JSON string as specified in RFC8259.
func NewJSONType ¶
NewJSONType creates a new JSONType with the specified storage type. storageType must be one of String, LargeString, StringView.
func (*JSONType) Deserialize ¶
func (*JSONType) ExtensionEquals ¶
func (b *JSONType) ExtensionEquals(other arrow.ExtensionType) bool
func (*JSONType) ExtensionName ¶
func (*JSONType) ParquetLogicalType ¶
func (b *JSONType) ParquetLogicalType() schema.LogicalType
ParquetLogicalType implements pqarrow.ExtensionCustomParquetType.
type OpaqueArray ¶
type OpaqueArray struct {
array.ExtensionArrayBase
}
OpaqueArray is a placeholder for data from an external (usually non-Arrow) system that could not be interpreted.
type OpaqueType ¶
type OpaqueType struct {
arrow.ExtensionBase `json:"-"`
TypeName string `json:"type_name"`
VendorName string `json:"vendor_name"`
}
OpaqueType is a placeholder for a type from an external (usually non-Arrow) system that could not be interpreted.
func NewOpaqueType ¶
func NewOpaqueType(storageType arrow.DataType, name, vendorName string) *OpaqueType
NewOpaqueType creates a new OpaqueType with the provided storage type, type name, and vendor name.
func (*OpaqueType) ArrayType ¶
func (*OpaqueType) ArrayType() reflect.Type
func (*OpaqueType) Deserialize ¶
func (*OpaqueType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error)
func (*OpaqueType) ExtensionEquals ¶
func (o *OpaqueType) ExtensionEquals(other arrow.ExtensionType) bool
func (*OpaqueType) ExtensionName ¶
func (*OpaqueType) ExtensionName() string
func (*OpaqueType) Serialize ¶
func (o *OpaqueType) Serialize() string
func (*OpaqueType) String ¶
func (o *OpaqueType) String() string
type UUIDArray ¶
type UUIDArray struct {
array.ExtensionArrayBase
}
UUIDArray is a simple array which is a FixedSizeBinary(16)
func (*UUIDArray) GetOneForMarshal ¶
func (*UUIDArray) MarshalJSON ¶
type UUIDBuilder ¶
type UUIDBuilder struct {
*array.ExtensionBuilder
}
func NewUUIDBuilder ¶
func NewUUIDBuilder(mem memory.Allocator) *UUIDBuilder
NewUUIDBuilder creates a new UUIDBuilder, exposing a convenient and efficient interface for writing uuid.UUID (or [16]byte) values to the underlying FixedSizeBinary storage array.
func (*UUIDBuilder) Append ¶
func (b *UUIDBuilder) Append(v uuid.UUID)
func (*UUIDBuilder) AppendBytes ¶
func (b *UUIDBuilder) AppendBytes(v [16]byte)
func (*UUIDBuilder) AppendValueFromString ¶
func (b *UUIDBuilder) AppendValueFromString(s string) error
func (*UUIDBuilder) AppendValues ¶
func (b *UUIDBuilder) AppendValues(v []uuid.UUID, valid []bool)
func (*UUIDBuilder) UnmarshalJSON ¶
func (b *UUIDBuilder) UnmarshalJSON(data []byte) error
func (*UUIDBuilder) UnmarshalOne ¶
func (b *UUIDBuilder) UnmarshalOne(dec *json.Decoder) error
func (*UUIDBuilder) UnsafeAppend ¶
func (b *UUIDBuilder) UnsafeAppend(v uuid.UUID)
type UUIDType ¶
type UUIDType struct {
arrow.ExtensionBase
}
UUIDType is a simple extension type that represents a FixedSizeBinary(16) to be used for representing UUIDs
func NewUUIDType ¶
func NewUUIDType() *UUIDType
NewUUIDType is a convenience function to create an instance of UUIDType with the correct storage type
func (*UUIDType) Deserialize ¶
Deserialize expects storageType to be FixedSizeBinaryType{ByteWidth: 16}
func (*UUIDType) ExtensionEquals ¶
func (e *UUIDType) ExtensionEquals(other arrow.ExtensionType) bool
ExtensionEquals returns true if both extensions have the same name
func (*UUIDType) ExtensionName ¶
func (*UUIDType) MarshalJSON ¶
func (*UUIDType) ParquetLogicalType ¶
func (e *UUIDType) ParquetLogicalType() schema.LogicalType
ParquetLogicalType implements pqarrow.ExtensionCustomParquetType.
type VariantArray ¶ added in v18.4.0
type VariantArray struct {
array.ExtensionArrayBase
// contains filtered or unexported fields
}
VariantArray is an extension Array type containing Variant values which may potentially be shredded into multiple fields.
func (*VariantArray) GetOneForMarshal ¶ added in v18.4.0
func (v *VariantArray) GetOneForMarshal(i int) any
func (*VariantArray) IsNull ¶ added in v18.4.0
func (v *VariantArray) IsNull(i int) bool
IsNull will also take into account the special case where there is an encoded null variant in the untyped values array for this index and return appropriately.
func (*VariantArray) IsShredded ¶ added in v18.4.0
func (v *VariantArray) IsShredded() bool
IsShredded returns true if the variant has shredded columns.
func (*VariantArray) IsValid ¶ added in v18.4.0
func (v *VariantArray) IsValid(i int) bool
func (*VariantArray) MarshalJSON ¶ added in v18.4.0
func (v *VariantArray) MarshalJSON() ([]byte, error)
func (*VariantArray) Metadata ¶ added in v18.4.0
func (v *VariantArray) Metadata() arrow.TypedArray[[]byte]
Metadata returns the metadata column of the variant array, containing the metadata for each variant value.
func (*VariantArray) Shredded ¶ added in v18.4.0
func (v *VariantArray) Shredded() arrow.Array
Shredded returns the typed array for the shredded values of the variant array, following the rules of the Parquet Variant specification. As such, this array will always be either a struct, a list, or a primitive array.
The reason for exposing this is to allow users to quickly access one of the shredded fields without having to decode the entire variant value.
func (*VariantArray) String ¶ added in v18.4.0
func (v *VariantArray) String() string
func (*VariantArray) UntypedValues ¶ added in v18.4.0
func (v *VariantArray) UntypedValues() arrow.TypedArray[[]byte]
UntypedValues returns the untyped variant values for each element of the array, if the array is not shredded this will contain the variant bytes for each value. If the array is shredded, this will contain any variant values that are either partially shredded objects or are not shredded at all (e.g. a value that doesnt match the types of the shredding).
The shredded array and the untyped values array together are used to encode a single value. If this is not encoding shredded object fields, then a given index will never be null in both arrays. (A null value will be an encoded null variant value in this array with a null in the shredded array).
If both arrays are null for a given index (only valid for shredded object fields), it means that the value is missing entirely (as opposed to existing and having a value of null).
func (*VariantArray) Value ¶ added in v18.4.0
func (v *VariantArray) Value(i int) (variant.Value, error)
func (*VariantArray) ValueStr ¶ added in v18.4.0
func (v *VariantArray) ValueStr(i int) string
type VariantBuilder ¶ added in v18.4.0
type VariantBuilder struct {
*array.ExtensionBuilder
// contains filtered or unexported fields
}
VariantBuilder is an array builder for both shredded or non-shredded variant extension arrays. It allows you to append variant values, and will appropriately shred them if it is able to do so based on the underlying storage type.
func NewVariantBuilder ¶ added in v18.4.0
func NewVariantBuilder(mem memory.Allocator, dt *VariantType) *VariantBuilder
NewVariantBuilder creates a new VariantBuilder for the given variant type which may or may not be shredded.
func (*VariantBuilder) Append ¶ added in v18.4.0
func (b *VariantBuilder) Append(v variant.Value)
func (*VariantBuilder) Unmarshal ¶ added in v18.4.0
func (b *VariantBuilder) Unmarshal(dec *json.Decoder) error
func (*VariantBuilder) UnmarshalJSON ¶ added in v18.4.0
func (b *VariantBuilder) UnmarshalJSON(data []byte) error
func (*VariantBuilder) UnmarshalOne ¶ added in v18.4.0
func (b *VariantBuilder) UnmarshalOne(dec *json.Decoder) error
type VariantType ¶ added in v18.4.0
type VariantType struct {
arrow.ExtensionBase
// contains filtered or unexported fields
}
VariantType is the arrow extension type for representing Variant values as defined by the Parquet Variant specification for encoding and shredding values. The underlying storage must be a struct type with a minimum of two fields ("metadata" and "value") and an optional third field ("typed_value").
See the documentation for NewVariantType for the rules for creating a variant type.
func NewDefaultVariantType ¶ added in v18.4.0
func NewDefaultVariantType() *VariantType
NewDefaultVariantType creates a basic, non-shredded variant type. The underlying storage type will be struct<metadata: binary non-null, value: binary non-null>.
func NewShreddedVariantType ¶ added in v18.4.0
func NewShreddedVariantType(dt arrow.DataType) *VariantType
NewShreddedVariantType creates a new VariantType extension type using the provided type to define a shredded schema by setting the `typed_value` field accordingly and properly constructing the shredded fields for structs, lists and so on.
For example:
NewShreddedVariantType(arrow.StructOf(
arrow.Field{Name: "latitude", Type: arrow.PrimitiveTypes.Float64},
arrow.Field{Name: "longitude", Type: arrow.PrimitiveTypes.Float32}))
Will create a variant type with the following structure:
arrow.StructOf(
arrow.Field{Name: "metadata", Type: arrow.BinaryTypes.Binary, Nullable: false},
arrow.Field{Name: "value", Type: arrow.BinaryTypes.Binary, Nullable: true},
arrow.Field{Name: "typed_value", Type: arrow.StructOf(
arrow.Field{Name: "latitude", Type: arrow.StructOf(
arrow.Field{Name: "value", Type: arrow.BinaryTypes.Binary, Nullable: true},
arrow.Field{Name: "typed_value", Type: arrow.PrimitiveTypes.Float64, Nullable: true}),
Nullable: false},
arrow.Field{Name: "longitude", Type: arrow.StructOf(
arrow.Field{Name: "value", Type: arrow.BinaryTypes.Binary, Nullable: true},
arrow.Field{Name: "typed_value", Type: arrow.PrimitiveTypes.Float32, Nullable: true}),
Nullable: false},
), Nullable: true})
This is intended to be a convenient way to create a shredded variant type from a definition of the fields to shred. If the provided data type is nil, it will create a default variant type.
func NewVariantType ¶ added in v18.4.0
func NewVariantType(storage arrow.DataType) (*VariantType, error)
NewVariantType creates a new variant type based on the provided storage type.
The rules for a variant storage type are:
- MUST be a struct
- MUST have non-nullable field named "metadata" that is binary/largebinary/binary_view
- Must satisfy exactly one of the following: a. MUST have non-nullable field named "value" that is binary/largebinary/binary_view b. MUST have an nullable field named "value" that is binary/largebinary/binary_view and another nullable field named "typed_value" that is either a primitive type or a list/large_list/list_view or struct which also satisfies the following requirements: i. The elements must be NON-NULLABLE ii. There must either be a single NON-NULLABLE field named "value" which is binary/largebinary/binary_view or have an nullable "value" field and an nullable "typed_value" field that follows the rules laid out in (b).
The metadata field may also be dictionary encoded
func (*VariantType) ArrayType ¶ added in v18.4.0
func (*VariantType) ArrayType() reflect.Type
func (*VariantType) Deserialize ¶ added in v18.4.0
func (*VariantType) Deserialize(storageType arrow.DataType, _ string) (arrow.ExtensionType, error)
func (*VariantType) ExtensionEquals ¶ added in v18.4.0
func (v *VariantType) ExtensionEquals(other arrow.ExtensionType) bool
func (*VariantType) ExtensionName ¶ added in v18.4.0
func (*VariantType) ExtensionName() string
func (*VariantType) Metadata ¶ added in v18.4.0
func (v *VariantType) Metadata() arrow.Field
func (*VariantType) NewBuilder ¶ added in v18.4.0
func (v *VariantType) NewBuilder(mem memory.Allocator) array.Builder
func (*VariantType) ParquetLogicalType ¶ added in v18.4.0
func (*VariantType) ParquetLogicalType() schema.LogicalType
func (*VariantType) Serialize ¶ added in v18.4.0
func (*VariantType) Serialize() string
func (*VariantType) String ¶ added in v18.4.0
func (v *VariantType) String() string
func (*VariantType) TypedValue ¶ added in v18.4.0
func (v *VariantType) TypedValue() arrow.Field
func (*VariantType) Value ¶ added in v18.4.0
func (v *VariantType) Value() arrow.Field