This repository has been archived by the owner on Sep 17, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathelastic_net.py
74 lines (51 loc) · 2.39 KB
/
elastic_net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import numpy as np
from scipy import sparse as sps
from SLIM_Elastic_Net_Structured.SLIM_Elastic_Net_Structured import SLIM_Elastic_Net_Structured
'''
movies = [i.strip().split("::") for i in open('/home/luca/Scaricati/ml-10M100K/movies.dat', 'r').readlines()]
movies_df = pd.DataFrame(movies, columns = ['MovieID', 'Title', 'Kind'], dtype = int)
tempKind, tempID = [], []
for i in range(len(movies_df.MovieID)):
temp = movies_df.Kind[i].split("|")
tempKind.extend(temp)
tempID.extend([movies_df.MovieID[i] for y in range(len(temp))])
movies_df = pd.DataFrame({"MovieID": tempID, "Kind": tempKind}).dropna(axis=0)
print('Starting create unique list')
movie_list = list(movies_df.MovieID.unique())
kind_list = list(movies_df.Kind.unique())
print(len(movie_list))
print(kind_list)
print('Starting create rows and cols')
rows = list()
cols = list()
cols = movies_df.MovieID.astype('category', categories=movie_list).cat.codes
rows = movies_df.Kind.astype('category', categories=kind_list).cat.codes
data = np.ones(len(rows)).squeeze()
icm = sps.csc_matrix((data, (rows, cols)), shape=(len(kind_list),len(movie_list)))
ratings = [i.strip().split("::") for i in open('/home/luca/Scaricati/ml-10M100K/ratings.dat', 'r').readlines()]
ratings_df = pd.DataFrame(ratings, columns = ['UserID', 'MovieID', 'Ratings', 'Timestamp'], dtype = int).dropna(axis=0)
print('Starting create unique list for urm')
user_list = list(ratings_df.UserID.unique())
print(len(user_list))
cols = list()
rows = list()
cols = ratings_df.MovieID.astype('category', categories=movie_list).cat.codes
rows = ratings_df.UserID.astype('category', categories=user_list).cat.codes
data = ratings_df.Ratings.astype(float)
urm = sps.csc_matrix((data, (rows, cols)), shape=(len(user_list),len(movie_list)), dtype=np.float32)
sps.save_npz("files/urm.npz", urm)
sps.save_npz("files/icm.npz", icm)
'''
movie_list = np.load('files/movies_list.npy')
user_list = np.load('files/user_list.npy')
kind_list = np.load('files/kind_list.npy')
train = sps.load_npz("files/train.npz")
test = sps.load_npz("files/test.npz")
icm = sps.load_npz("files/icm.npz")
print(type(icm))
el = SLIM_Elastic_Net_Structured(icm, train)
el.fit(epochs=1)
print(el.evaluateRecommendations(test))
'''
{'AUC': 0.14041332760923478, 'precision': 0.06498282770463865, 'recall': 0.0029702795654990254, 'map': 0.03951178210265244, 'NDCG': 0.008137593858484806, 'MRR': 0.14227723716847893}
'''