Documentation
¶
Overview ¶
Package dataframe provides a two-dimensional labeled data structure with operations.
Package dataframe provides a two-dimensional labeled data structure.
DataFrame is the primary data structure in GopherData, analogous to pandas.DataFrame in Python or data.frame in R. It consists of:
- Multiple Series (columns) with potentially different types
- A shared Index for row labels
- Copy-on-write semantics for predictable behavior
Key features:
- Type-safe generic Series under the hood
- Efficient null handling via bit-packed masks
- Thread-safe concurrent reads
- Rich selection and filtering operations
- Automatic parallelism for expensive operations
Example:
df, err := dataframe.New(map[string]any{
"name": []string{"Alice", "Bob", "Charlie"},
"age": []int64{25, 30, 35},
})
subset := df.Select("name", "age")
filtered := subset.Filter(func(r *Row) bool {
age, _ := r.Get("age")
return age.(int64) > 25
})
Index ¶
- Constants
- type AggOption
- type AggOptions
- type DataFrame
- func (df *DataFrame) Apply(fn func(*Row) any, resultCol string) *DataFrame
- func (df *DataFrame) ApplyColumn(col string, fn func(any) any) *DataFrame
- func (df *DataFrame) ApplyElement(cols []string, fn func(map[string]any) map[string]any) *DataFrame
- func (df *DataFrame) Argsort(col string, order core.Order, opts ...SortOption) []int
- func (df *DataFrame) Column(name string) (*series.Series[any], error)
- func (df *DataFrame) Columns() []string
- func (df *DataFrame) Copy() *DataFrame
- func (df *DataFrame) Count(cols ...string) (map[string]int, error)
- func (df *DataFrame) Describe() (*DataFrame, error)
- func (df *DataFrame) Drop(cols ...string) *DataFrame
- func (df *DataFrame) DropNA(opts ...DropNAOption) *DataFrame
- func (df *DataFrame) EWM(alpha float64) *Window
- func (df *DataFrame) Empty() bool
- func (df *DataFrame) Expanding(minPeriods int) *Window
- func (df *DataFrame) FillNA(value any) *DataFrame
- func (df *DataFrame) FillNAColumn(col string, value any) *DataFrame
- func (df *DataFrame) FillNADict(values map[string]any) *DataFrame
- func (df *DataFrame) Filter(fn func(*Row) bool) *DataFrame
- func (df *DataFrame) GroupBy(cols ...string) (*GroupBy, error)
- func (df *DataFrame) HasColumn(name string) bool
- func (df *DataFrame) Head(n int) string
- func (df *DataFrame) Iloc(positions ...int) *DataFrame
- func (df *DataFrame) Index() core.Index
- func (df *DataFrame) Interpolate(method string, opts ...InterpolateOption) *DataFrame
- func (df *DataFrame) IsNA() (*DataFrame, error)
- func (df *DataFrame) Join(other *DataFrame, joinType, onCol string, opts ...JoinOption) (*DataFrame, error)
- func (df *DataFrame) Loc(labels ...any) (*DataFrame, error)
- func (df *DataFrame) Map(fn func(any) any) *DataFrame
- func (df *DataFrame) Max(cols ...string) (map[string]any, error)
- func (df *DataFrame) Mean(cols ...string) (map[string]float64, error)
- func (df *DataFrame) Median(cols ...string) (map[string]float64, error)
- func (df *DataFrame) Melt(idVars, valueVars []string, varName, valueName string) (*DataFrame, error)
- func (df *DataFrame) Merge(other *DataFrame, joinType string, leftOn, rightOn []string, ...) (*DataFrame, error)
- func (df *DataFrame) Min(cols ...string) (map[string]any, error)
- func (df *DataFrame) Ncols() int
- func (df *DataFrame) NotNA() (*DataFrame, error)
- func (df *DataFrame) Nrows() int
- func (df *DataFrame) Pivot(index, columns, values string) (*DataFrame, error)
- func (df *DataFrame) Rename(mapping map[string]string) *DataFrame
- func (df *DataFrame) Rolling(size int, opts ...WindowOption) *Window
- func (df *DataFrame) Select(cols ...string) *DataFrame
- func (df *DataFrame) SetIndex(idx core.Index) error
- func (df *DataFrame) Shape() (int, int)
- func (df *DataFrame) SliceRows(start, end int) *DataFrame
- func (df *DataFrame) Sort(col string, order core.Order, opts ...SortOption) *DataFrame
- func (df *DataFrame) SortIndex(order core.Order, opts ...SortOption) *DataFrame
- func (df *DataFrame) SortMulti(cols []string, orders []core.Order, opts ...SortOption) *DataFrame
- func (df *DataFrame) Stack() (*DataFrame, error)
- func (df *DataFrame) Std(cols ...string) (map[string]float64, error)
- func (df *DataFrame) String() string
- func (df *DataFrame) Sum(cols ...string) (map[string]float64, error)
- func (df *DataFrame) Tail(n int) string
- func (df *DataFrame) Transpose() (*DataFrame, error)
- func (df *DataFrame) Unstack(rowCol, colCol, valueCol string) (*DataFrame, error)
- func (df *DataFrame) Var(cols ...string) (map[string]float64, error)
- func (df *DataFrame) WithColumn(name string, s *series.Series[any]) *DataFrame
- type DatetimeIndex
- type DropNAOption
- type DropNAOptions
- type GroupBy
- func (gb *GroupBy) Agg(ops map[string]string) (*DataFrame, error)
- func (gb *GroupBy) AggMultiple(ops map[string][]string) (*DataFrame, error)
- func (gb *GroupBy) Apply(fn func(*DataFrame) any) (*DataFrame, error)
- func (gb *GroupBy) Count() (*DataFrame, error)
- func (gb *GroupBy) Size() (*DataFrame, error)
- type InterpolateOption
- type InterpolateOptions
- type JoinOption
- type JoinOptions
- type RangeIndex
- type Row
- type SortOption
- type SortOptions
- type StringIndex
- type Window
- func (w *Window) Max(col string) (*series.Series[any], error)
- func (w *Window) Mean(col string) (*series.Series[float64], error)
- func (w *Window) Min(col string) (*series.Series[any], error)
- func (w *Window) Std(col string) (*series.Series[float64], error)
- func (w *Window) Sum(col string) (*series.Series[float64], error)
- type WindowOption
- type WindowOptions
Constants ¶
const ( AggSum = "sum" // Sum of values AggMean = "mean" // Arithmetic mean AggMedian = "median" // 50th percentile AggStd = "std" // Sample standard deviation AggVar = "var" // Sample variance AggMin = "min" // Minimum value AggMax = "max" // Maximum value AggCount = "count" // Count non-null values AggSize = "size" // Count all values (including nulls) AggFirst = "first" // First value in group AggLast = "last" // Last value in group )
Aggregation function names
const ( JoinInner = "inner" // Intersection JoinLeft = "left" // All from left, matching from right JoinRight = "right" // All from right, matching from left JoinOuter = "outer" // Union (full outer join) JoinCross = "cross" // Cartesian product )
Join types
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AggOption ¶
type AggOption func(*AggOptions)
AggOption is a functional option for aggregations.
type AggOptions ¶
type AggOptions struct {
// contains filtered or unexported fields
}
AggOptions configures aggregation behavior.
type DataFrame ¶
type DataFrame struct {
// contains filtered or unexported fields
}
DataFrame is a two-dimensional, size-mutable, tabular data structure. Operations return new DataFrames (copy-on-write semantics) unless noted.
func FromRecords ¶
FromRecords creates a DataFrame from a slice of maps (records). Each map represents a row with column names as keys.
func New ¶
New creates a new DataFrame from a map of column names to data slices. All slices must have the same length. Automatically infers data types and creates a default RangeIndex.
func (*DataFrame) Apply ¶
Apply applies a function to each row and adds the result as a new column. The function receives a Row and returns a single value.
func (*DataFrame) ApplyColumn ¶
ApplyColumn applies a function to each value in a column.
func (*DataFrame) ApplyElement ¶
ApplyElement applies a function to selected columns element-wise. The function receives a map of column values for the current row.
func (*DataFrame) Column ¶
Column returns the Series for the given column name. Returns an error if the column doesn't exist.
func (*DataFrame) Describe ¶
Describe generates descriptive statistics for numeric columns. Returns a DataFrame with statistics: count, mean, std, min, 25%, 50%, 75%, max.
func (*DataFrame) DropNA ¶
func (df *DataFrame) DropNA(opts ...DropNAOption) *DataFrame
DropNA returns a new DataFrame with rows containing null values removed.
func (*DataFrame) FillNA ¶
FillNA returns a new DataFrame with null values replaced by the given value.
func (*DataFrame) FillNAColumn ¶
FillNAColumn returns a new DataFrame with nulls in a specific column replaced.
func (*DataFrame) FillNADict ¶
FillNADict returns a new DataFrame with nulls replaced using a column-specific map.
func (*DataFrame) Filter ¶
Filter returns a new DataFrame containing only rows for which the predicate returns true. This creates a copy of the data for filtered rows.
func (*DataFrame) HasColumn ¶
HasColumn returns true if the DataFrame has a column with the given name.
func (*DataFrame) Interpolate ¶
func (df *DataFrame) Interpolate(method string, opts ...InterpolateOption) *DataFrame
Interpolate fills null values using interpolation. method can be "linear", "ffill", or "bfill".
func (*DataFrame) Join ¶
func (df *DataFrame) Join(other *DataFrame, joinType, onCol string, opts ...JoinOption) (*DataFrame, error)
Join performs a join operation on a single column.
func (*DataFrame) Map ¶
Map applies a function to each value in the DataFrame. Returns a new DataFrame with all values transformed.
func (*DataFrame) Melt ¶
func (df *DataFrame) Melt(idVars, valueVars []string, varName, valueName string) (*DataFrame, error)
Melt transforms wide format to long format. idVars: columns to use as identifier variables valueVars: columns to unpivot (if empty, use all non-id columns) varName: name for the variable column valueName: name for the value column
func (*DataFrame) Merge ¶
func (df *DataFrame) Merge(other *DataFrame, joinType string, leftOn, rightOn []string, opts ...JoinOption) (*DataFrame, error)
Merge performs a join operation on multiple columns. leftOn and rightOn specify the join keys for left and right DataFrames.
func (*DataFrame) NotNA ¶
NotNA returns a DataFrame of boolean values indicating non-null positions.
func (*DataFrame) Pivot ¶
Pivot transforms long format to wide format. index: column to use as row index columns: column to use for new column names values: column to use for cell values
func (*DataFrame) Rolling ¶
func (df *DataFrame) Rolling(size int, opts ...WindowOption) *Window
Rolling creates a rolling window.
func (*DataFrame) Select ¶
Select returns a new DataFrame containing only the specified columns. This is a view operation (zero-copy) - the underlying data is shared.
func (*DataFrame) SliceRows ¶
SliceRows returns a new DataFrame with rows from start (inclusive) to end (exclusive).
func (*DataFrame) Sort ¶
Sort sorts the DataFrame by a single column. Returns a new DataFrame with rows reordered.
func (*DataFrame) SortIndex ¶
func (df *DataFrame) SortIndex(order core.Order, opts ...SortOption) *DataFrame
SortIndex sorts the DataFrame by its index.
func (*DataFrame) SortMulti ¶
SortMulti sorts the DataFrame by multiple columns. Columns are sorted in order of priority (first column is primary sort key).
func (*DataFrame) Stack ¶
Stack pivots columns into rows (multi-level index). For simplicity, this implementation creates a long-form DataFrame.
func (*DataFrame) Sum ¶
Sum calculates the sum of numeric columns. Returns a map of column names to their sums.
type DatetimeIndex ¶
type DatetimeIndex struct {
// contains filtered or unexported fields
}
DatetimeIndex is a time-based index for time series data.
func NewDatetimeIndex ¶
func NewDatetimeIndex(times []time.Time, tz *time.Location) *DatetimeIndex
NewDatetimeIndex creates a new DatetimeIndex.
func (*DatetimeIndex) Copy ¶
func (di *DatetimeIndex) Copy() core.Index
Copy returns a copy of the index.
func (*DatetimeIndex) Get ¶
func (di *DatetimeIndex) Get(pos int) any
Get returns the label at the given position.
func (*DatetimeIndex) Len ¶
func (di *DatetimeIndex) Len() int
Len returns the number of elements in the index.
type DropNAOption ¶
type DropNAOption func(*DropNAOptions)
DropNAOption is a functional option for DropNA.
func Subset ¶
func Subset(cols []string) DropNAOption
Subset specifies columns to consider for null checking.
func Thresh ¶
func Thresh(n int) DropNAOption
Thresh sets the minimum number of non-null values required to keep a row.
type DropNAOptions ¶
type DropNAOptions struct {
// contains filtered or unexported fields
}
DropNAOptions configures DropNA behavior.
type GroupBy ¶
type GroupBy struct {
// contains filtered or unexported fields
}
GroupBy represents a grouped DataFrame for aggregation operations.
func (*GroupBy) Agg ¶
Agg performs single aggregation per column. ops maps column names to aggregation function names. Example: {"sales": "sum", "qty": "mean"}
func (*GroupBy) AggMultiple ¶
AggMultiple performs multiple aggregations per column. ops maps column names to slices of aggregation function names. Example: {"sales": ["sum", "mean", "std"], "qty": ["min", "max"]} Result columns: [group_keys..., sales_sum, sales_mean, sales_std, qty_min, qty_max]
type InterpolateOption ¶
type InterpolateOption func(*InterpolateOptions)
InterpolateOption is a functional option for Interpolate.
func Limit ¶
func Limit(n int) InterpolateOption
Limit sets the maximum number of consecutive nulls to fill.
type InterpolateOptions ¶
type InterpolateOptions struct {
// contains filtered or unexported fields
}
InterpolateOptions configures interpolation behavior.
type JoinOption ¶
type JoinOption func(*JoinOptions)
JoinOption is a functional option for joins.
func WithIndicator ¶
func WithIndicator(colName string) JoinOption
WithIndicator adds a column indicating the source of each row.
func WithSuffixes ¶
func WithSuffixes(left, right string) JoinOption
WithSuffixes sets custom suffixes for overlapping columns.
type JoinOptions ¶
type JoinOptions struct {
// contains filtered or unexported fields
}
JoinOptions configures join behavior.
type RangeIndex ¶
type RangeIndex struct {
// contains filtered or unexported fields
}
RangeIndex is an integer-based index with start, stop, and step. Represents indices [start, start+step, start+2*step, ..., stop-1].
func NewRangeIndex ¶
func NewRangeIndex(start, stop, step int) *RangeIndex
NewRangeIndex creates a new RangeIndex.
func (*RangeIndex) Get ¶
func (ri *RangeIndex) Get(pos int) any
Get returns the label at the given position.
func (*RangeIndex) Len ¶
func (ri *RangeIndex) Len() int
Len returns the number of elements in the index.
type Row ¶
type Row struct {
// contains filtered or unexported fields
}
Row represents a single row in a DataFrame for filtering operations.
type SortOptions ¶
type SortOptions struct {
// contains filtered or unexported fields
}
SortOptions configures sort behavior.
type StringIndex ¶
type StringIndex struct {
// contains filtered or unexported fields
}
StringIndex is a string-based index with a lookup map for fast label-based access.
func NewStringIndex ¶
func NewStringIndex(labels []string) *StringIndex
NewStringIndex creates a new StringIndex.
func (*StringIndex) Copy ¶
func (si *StringIndex) Copy() core.Index
Copy returns a copy of the index.
func (*StringIndex) Get ¶
func (si *StringIndex) Get(pos int) any
Get returns the label at the given position.
func (*StringIndex) Len ¶
func (si *StringIndex) Len() int
Len returns the number of elements in the index.
type Window ¶
type Window struct {
// contains filtered or unexported fields
}
Window represents a rolling window over a DataFrame.
type WindowOption ¶
type WindowOption func(*WindowOptions)
WindowOption is a functional option for windows.
func MinPeriods ¶
func MinPeriods(n int) WindowOption
MinPeriods sets the minimum number of observations in window.
type WindowOptions ¶
type WindowOptions struct {
// contains filtered or unexported fields
}
WindowOptions configures window behavior.