dataset

package
v0.5.0-alpha.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 15, 2025 License: Apache-2.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func LoadDataFromBuiltIn

func LoadDataFromBuiltIn(dataSetName string) (*Dataset, *Dataset, error)

Types

type CFSplit

type CFSplit interface {
	// CountUsers returns the number of users.
	CountUsers() int
	// CountItems returns the number of items.
	CountItems() int
	// CountFeedback returns the number of (positive) feedback.
	CountFeedback() int
	// GetUserDict returns the frequency dictionary of users.
	GetUserDict() *FreqDict
	// GetItemDict returns the frequency dictionary of items.
	GetItemDict() *FreqDict
	// GetUserFeedback returns the (positive) feedback of users.
	GetUserFeedback() [][]int32
	// GetItemFeedback returns the (positive) feedback of items.
	GetItemFeedback() [][]int32
	// SampleUserNegatives samples negative (feedback) for users.
	SampleUserNegatives(excludeSet CFSplit, numCandidates int) [][]int32
}

CFSplit is the dataset split for collaborative filtering.

type CTRSplit

type CTRSplit interface {
	Count() int
	CountUsers() int
	CountItems() int
	CountUserLabels() int
	CountItemLabels() int
	CountContextLabels() int
	CountPositive() int
	CountNegative() int
	GetIndex() base.UnifiedIndex
	GetTarget(i int) float32
	Get(i int) ([]int32, []float32, float32)
}

CTRSplit is the dataset split for click-through rate prediction.

type Dataset

type Dataset struct {
	// contains filtered or unexported fields
}

func NewDataset

func NewDataset(timestamp time.Time, userCount, itemCount int) *Dataset

func (*Dataset) AddFeedback

func (d *Dataset) AddFeedback(userId, itemId string)

func (*Dataset) AddItem

func (d *Dataset) AddItem(item data.Item)

func (*Dataset) AddUser

func (d *Dataset) AddUser(user data.User)

func (*Dataset) CountFeedback

func (d *Dataset) CountFeedback() int

func (*Dataset) CountItems

func (d *Dataset) CountItems() int

func (*Dataset) CountUsers

func (d *Dataset) CountUsers() int

func (*Dataset) GetCategories

func (d *Dataset) GetCategories() []string

func (*Dataset) GetItemColumnValuesIDF

func (d *Dataset) GetItemColumnValuesIDF() []float32

func (*Dataset) GetItemDict

func (d *Dataset) GetItemDict() *FreqDict

func (*Dataset) GetItemFeedback

func (d *Dataset) GetItemFeedback() [][]int32

func (*Dataset) GetItemIDF

func (d *Dataset) GetItemIDF() []float32

GetItemIDF returns the IDF of items.

IDF(i) = log(U/freq(i))

U is the number of users. freq(i) is the frequency of item i in all feedback.

func (*Dataset) GetItems

func (d *Dataset) GetItems() []data.Item

func (*Dataset) GetTimestamp

func (d *Dataset) GetTimestamp() time.Time

func (*Dataset) GetUserColumnValuesIDF

func (d *Dataset) GetUserColumnValuesIDF() []float32

func (*Dataset) GetUserDict

func (d *Dataset) GetUserDict() *FreqDict

func (*Dataset) GetUserFeedback

func (d *Dataset) GetUserFeedback() [][]int32

func (*Dataset) GetUserIDF

func (d *Dataset) GetUserIDF() []float32

GetUserIDF returns the IDF of users.

IDF(u) = log(I/freq(u))

I is the number of items. freq(u) is the frequency of user u in all feedback.

func (*Dataset) GetUsers

func (d *Dataset) GetUsers() []data.User

func (*Dataset) SampleUserNegatives

func (d *Dataset) SampleUserNegatives(excludeSet CFSplit, numCandidates int) [][]int32

func (*Dataset) SplitCF

func (d *Dataset) SplitCF(numTestUsers int, seed int64) (CFSplit, CFSplit)

SplitCF splits dataset by user-leave-one-out method. The argument `numTestUsers` determines the number of users in the test set. If numTestUsers is equal or greater than the number of total users or numTestUsers <= 0, all users are presented in the test set.

type FreqDict

type FreqDict struct {
	// contains filtered or unexported fields
}

func NewFreqDict

func NewFreqDict() (d *FreqDict)

func (*FreqDict) Add

func (d *FreqDict) Add(s string) (y int32)

func (*FreqDict) AddNoCount

func (d *FreqDict) AddNoCount(s string) (y int32)

func (*FreqDict) Count

func (d *FreqDict) Count() int32

func (*FreqDict) Freq

func (d *FreqDict) Freq(id int32) int32

func (*FreqDict) Id

func (d *FreqDict) Id(s string) int32

func (*FreqDict) String

func (d *FreqDict) String(id int32) (s string, ok bool)

func (*FreqDict) ToIndex

func (d *FreqDict) ToIndex() *base.Index

type ID

type ID int32

type Labels

type Labels struct {
	// contains filtered or unexported fields
}

func NewLabels

func NewLabels() *Labels

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL