movielens

package
v0.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 23, 2022 License: AGPL-3.0 Imports: 14 Imported by: 0

README

MovieLens Example

Original Data: MovieLens 100k

SQLite DB file: movielens.db.zip

To run the tests, you need download the SQLite DB file and put it in the current directory.

# download and unzip the SQLite DB file
wget https://github.com/auxten/edgeRec/files/9482338/movielens.db.zip && unzip movielens.db.zip

SQL that split training set and test set by 80% and 20% user:


-- import data from csv, do it with any tool

select count(distinct userId) from ratings; -- 610 users

create table user as select distinct userId, 0 as is_train  from ratings;

-- choose 80% random user as train user
update user
set is_train = 1
where userId in
      (SELECT userId
       FROM (select distinct userId from ratings)
       ORDER BY RANDOM()
       LIMIT 488);

select count(*) from user where is_train != 1;

-- split train and test set of movielens ratings
create table ratings_train as
select r.userId, movieId, rating, timestamp
from ratings r
         left join user u on r.userId = u.userId
where is_train = 1;
create table ratings_test as
select r.userId, movieId, rating, timestamp
from ratings r
         left join user u on r.userId = u.userId
where is_train = 0;

select count(*) from ratings_train;
select count(*) from ratings_test;
select count(*) from ratings;

select count(distinct movieId) from movies

The DIN way to split dataset

There is another way to split the MovieLens-20m dataset with userId that is described in the Deep Interest Network paper.

MovieLens 20m

Related SQL:

create table movies
(
    movieId INTEGER,
    title   TEXT,
    genres  TEXT
);

create table ratings
(
    userId INTEGER,
    movieId INTEGER,
    rating FLOAT,
    timestamp INTEGER
);

create table tags
(
    userId    INTEGER,
    movieId   INTEGER,
    tag       TEXT,
    timestamp INTEGER
);

-- import data from csv, do it with any tool

select count(distinct userId) from ratings; -- 138,493 users

create table user as select distinct userId, 0 as is_train  from ratings;

-- choose 100000 random user as train user
update user
set is_train = 1
where userId in
      (SELECT userId
       FROM (select distinct userId from ratings)
       ORDER BY RANDOM()
       LIMIT 100000);

select count(*) from user where is_train != 1; -- 38,493 test users

-- split train and test set of movielens-20m ratings
create table ratings_train as
select r.userId, movieId, rating, timestamp
from ratings r
         left join user u on r.userId = u.userId
where is_train = 1;
create table ratings_test as
select r.userId, movieId, rating, timestamp
from ratings r
         left join user u on r.userId = u.userId
where is_train = 0;

select count(*) from ratings_train; --14,393,526
select count(*) from ratings_test;  --5,606,737
select count(*) from ratings;       --20,000,263

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func BinarizeLabel

func BinarizeLabel(rating float64) float64

Types

type RecSysImpl

type RecSysImpl struct {
	DataPath  string
	SampleCnt int
	Neural    base.Predicter
	// contains filtered or unexported fields
}

func (*RecSysImpl) GetDashboardOverview

func (recSys *RecSysImpl) GetDashboardOverview(ctx context.Context) (res rcmd.DashboardOverviewResult, err error)

func (*RecSysImpl) GetItemFeature

func (recSys *RecSysImpl) GetItemFeature(ctx context.Context, itemId int) (tensor rcmd.Tensor, err error)

func (*RecSysImpl) GetItemsFeatureOverview

func (recSys *RecSysImpl) GetItemsFeatureOverview(ctx context.Context, offset, size int, _ map[string][]string) (res rcmd.ItemOverviewResult, err error)

func (*RecSysImpl) GetUserFeature

func (recSys *RecSysImpl) GetUserFeature(ctx context.Context, userId int) (tensor rcmd.Tensor, err error)

func (*RecSysImpl) GetUsersFeatureOverview

func (recSys *RecSysImpl) GetUsersFeatureOverview(ctx context.Context, offset, size int, _ map[string][]string) (res rcmd.UserItemOverviewResult, err error)

func (*RecSysImpl) ItemSeqGenerator

func (recSys *RecSysImpl) ItemSeqGenerator(ctx context.Context) (ret <-chan string, err error)

func (*RecSysImpl) PreTrain

func (recSys *RecSysImpl) PreTrain(ctx context.Context) (err error)

func (*RecSysImpl) SampleGenerator

func (recSys *RecSysImpl) SampleGenerator(_ context.Context) (ret <-chan rcmd.Sample, err error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL