Documentation
¶
Overview ¶
Package encoders provides categorical encoding transformers.
Index ¶
- type FrequencyEncoder
- func (f *FrequencyEncoder) Fit(df *dataframe.DataFrame, _ ...string) error
- func (f *FrequencyEncoder) FitTransform(df *dataframe.DataFrame, target ...string) (*dataframe.DataFrame, error)
- func (f *FrequencyEncoder) GetMapping() map[string]map[string]float64
- func (f *FrequencyEncoder) IsFitted() bool
- func (f *FrequencyEncoder) Transform(df *dataframe.DataFrame) (*dataframe.DataFrame, error)
- type LabelEncoder
- func (l *LabelEncoder) Fit(df *dataframe.DataFrame, _ ...string) error
- func (l *LabelEncoder) FitTransform(df *dataframe.DataFrame, target ...string) (*dataframe.DataFrame, error)
- func (l *LabelEncoder) GetMapping() map[string]int64
- func (l *LabelEncoder) InverseTransform(df *dataframe.DataFrame) (*dataframe.DataFrame, error)
- func (l *LabelEncoder) IsFitted() bool
- func (l *LabelEncoder) Transform(df *dataframe.DataFrame) (*dataframe.DataFrame, error)
- type OneHotEncoder
- func (o *OneHotEncoder) Fit(df *dataframe.DataFrame, _ ...string) error
- func (o *OneHotEncoder) FitTransform(df *dataframe.DataFrame, target ...string) (*dataframe.DataFrame, error)
- func (o *OneHotEncoder) GetCategories() map[string][]string
- func (o *OneHotEncoder) IsFitted() bool
- func (o *OneHotEncoder) Transform(df *dataframe.DataFrame) (*dataframe.DataFrame, error)
- type OrdinalEncoder
- func (o *OrdinalEncoder) Fit(df *dataframe.DataFrame, _ ...string) error
- func (o *OrdinalEncoder) FitTransform(df *dataframe.DataFrame, target ...string) (*dataframe.DataFrame, error)
- func (o *OrdinalEncoder) GetMapping() map[string]int64
- func (o *OrdinalEncoder) IsFitted() bool
- func (o *OrdinalEncoder) Transform(df *dataframe.DataFrame) (*dataframe.DataFrame, error)
- type TargetEncoder
- func (t *TargetEncoder) Fit(df *dataframe.DataFrame, target ...string) error
- func (t *TargetEncoder) FitTransform(df *dataframe.DataFrame, target ...string) (*dataframe.DataFrame, error)
- func (t *TargetEncoder) GetMapping() map[string]map[string]float64
- func (t *TargetEncoder) IsFitted() bool
- func (t *TargetEncoder) Transform(df *dataframe.DataFrame) (*dataframe.DataFrame, error)
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type FrequencyEncoder ¶
type FrequencyEncoder struct {
// Columns to encode
Columns []string
// Normalize converts counts to frequencies (0-1 range). Default: false
Normalize bool
// contains filtered or unexported fields
}
FrequencyEncoder encodes categories by their frequency (count or percentage). More frequent categories get higher values.
func NewFrequencyEncoder ¶
func NewFrequencyEncoder(columns []string) *FrequencyEncoder
NewFrequencyEncoder creates a new FrequencyEncoder.
func (*FrequencyEncoder) Fit ¶
func (f *FrequencyEncoder) Fit(df *dataframe.DataFrame, _ ...string) error
Fit learns the frequency of each category.
func (*FrequencyEncoder) FitTransform ¶
func (f *FrequencyEncoder) FitTransform(df *dataframe.DataFrame, target ...string) (*dataframe.DataFrame, error)
FitTransform fits the encoder and transforms the data in one step.
func (*FrequencyEncoder) GetMapping ¶
func (f *FrequencyEncoder) GetMapping() map[string]map[string]float64
GetMapping returns the category to frequency mapping.
func (*FrequencyEncoder) IsFitted ¶
func (f *FrequencyEncoder) IsFitted() bool
IsFitted returns true if the encoder has been fitted.
type LabelEncoder ¶
type LabelEncoder struct {
// Column to encode
Column string
// contains filtered or unexported fields
}
LabelEncoder encodes categorical variables as integers. Each unique category is assigned an integer from 0 to n_categories-1.
func NewLabelEncoder ¶
func NewLabelEncoder(column string) *LabelEncoder
NewLabelEncoder creates a new LabelEncoder.
func (*LabelEncoder) Fit ¶
func (l *LabelEncoder) Fit(df *dataframe.DataFrame, _ ...string) error
Fit learns the mapping from categories to integers.
func (*LabelEncoder) FitTransform ¶
func (l *LabelEncoder) FitTransform(df *dataframe.DataFrame, target ...string) (*dataframe.DataFrame, error)
FitTransform fits the encoder and transforms the data in one step.
func (*LabelEncoder) GetMapping ¶
func (l *LabelEncoder) GetMapping() map[string]int64
GetMapping returns the category to integer mapping.
func (*LabelEncoder) InverseTransform ¶
InverseTransform converts encoded integers back to categories.
func (*LabelEncoder) IsFitted ¶
func (l *LabelEncoder) IsFitted() bool
IsFitted returns true if the encoder has been fitted.
type OneHotEncoder ¶
type OneHotEncoder struct {
// Columns to encode
Columns []string
// DropFirst drops the first category to avoid multicollinearity (dummy variable trap).
// Default: false
DropFirst bool
// HandleUnknown specifies how to handle unknown categories during transform.
// Options: "error" (default), "ignore"
HandleUnknown string
// contains filtered or unexported fields
}
OneHotEncoder encodes categorical variables as binary vectors. Each category becomes a separate binary column.
func NewOneHotEncoder ¶
func NewOneHotEncoder(columns []string) *OneHotEncoder
NewOneHotEncoder creates a new OneHotEncoder.
func (*OneHotEncoder) Fit ¶
func (o *OneHotEncoder) Fit(df *dataframe.DataFrame, _ ...string) error
Fit learns the unique categories for each column.
func (*OneHotEncoder) FitTransform ¶
func (o *OneHotEncoder) FitTransform(df *dataframe.DataFrame, target ...string) (*dataframe.DataFrame, error)
FitTransform fits the encoder and transforms the data in one step.
func (*OneHotEncoder) GetCategories ¶
func (o *OneHotEncoder) GetCategories() map[string][]string
GetCategories returns the learned categories for each column.
func (*OneHotEncoder) IsFitted ¶
func (o *OneHotEncoder) IsFitted() bool
IsFitted returns true if the encoder has been fitted.
type OrdinalEncoder ¶
type OrdinalEncoder struct {
// Column to encode
Column string
// Categories in order (e.g., ["low", "medium", "high"])
Categories []string
// contains filtered or unexported fields
}
OrdinalEncoder encodes categorical variables with ordinal relationships. Unlike LabelEncoder, the user specifies the order of categories.
func NewOrdinalEncoder ¶
func NewOrdinalEncoder(column string, categories []string) *OrdinalEncoder
NewOrdinalEncoder creates a new OrdinalEncoder with specified category order.
func (*OrdinalEncoder) Fit ¶
func (o *OrdinalEncoder) Fit(df *dataframe.DataFrame, _ ...string) error
Fit builds the mapping from the specified category order.
func (*OrdinalEncoder) FitTransform ¶
func (o *OrdinalEncoder) FitTransform(df *dataframe.DataFrame, target ...string) (*dataframe.DataFrame, error)
FitTransform fits the encoder and transforms the data in one step.
func (*OrdinalEncoder) GetMapping ¶
func (o *OrdinalEncoder) GetMapping() map[string]int64
GetMapping returns the category to integer mapping.
func (*OrdinalEncoder) IsFitted ¶
func (o *OrdinalEncoder) IsFitted() bool
IsFitted returns true if the encoder has been fitted.
type TargetEncoder ¶
type TargetEncoder struct {
// Columns to encode
Columns []string
// Target column name (provided during Fit)
Target string
// Smooth is the smoothing factor. Higher values give more weight to global mean.
// Default: 1.0
Smooth float64
// contains filtered or unexported fields
}
TargetEncoder encodes categories using the mean of the target variable. This is useful for high-cardinality categorical features. Uses smoothing to prevent overfitting: (count * mean + smooth * global_mean) / (count + smooth)
func NewTargetEncoder ¶
func NewTargetEncoder(columns []string) *TargetEncoder
NewTargetEncoder creates a new TargetEncoder.
func (*TargetEncoder) Fit ¶
func (t *TargetEncoder) Fit(df *dataframe.DataFrame, target ...string) error
Fit learns the target mean for each category.
func (*TargetEncoder) FitTransform ¶
func (t *TargetEncoder) FitTransform(df *dataframe.DataFrame, target ...string) (*dataframe.DataFrame, error)
FitTransform fits the encoder and transforms the data in one step.
func (*TargetEncoder) GetMapping ¶
func (t *TargetEncoder) GetMapping() map[string]map[string]float64
GetMapping returns the category to encoded value mapping.
func (*TargetEncoder) IsFitted ¶
func (t *TargetEncoder) IsFitted() bool
IsFitted returns true if the encoder has been fitted.