-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 906cf74
Showing
9 changed files
with
595 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import numpy as np | ||
import scipy.sparse as sp | ||
|
||
import torch.utils.data as data | ||
|
||
import pickle | ||
|
||
class BPRData(data.Dataset): | ||
def __init__(self, data, | ||
num_item, train_mat=None, num_ng=0, is_training=None): | ||
super(BPRData, self).__init__() | ||
""" Note that the labels are only useful when training, we thus | ||
add them in the ng_sample() function. | ||
""" | ||
self.data = np.array(data) | ||
self.num_item = num_item | ||
self.train_mat = train_mat | ||
self.num_ng = num_ng | ||
self.is_training = is_training | ||
|
||
def ng_sample(self): | ||
assert self.is_training, 'no need to sampling when testing' | ||
tmp_trainMat = self.train_mat.todok() | ||
length = self.data.shape[0] | ||
self.neg_data = np.random.randint(low=0, high=self.num_item, size=length) | ||
|
||
for i in range(length): | ||
uid = self.data[i][0] | ||
iid = self.neg_data[i] | ||
if (uid, iid) in tmp_trainMat: | ||
while (uid, iid) in tmp_trainMat: | ||
iid = np.random.randint(low=0, high=self.num_item) | ||
self.neg_data[i] = iid | ||
|
||
def __len__(self): | ||
return len(self.data) | ||
# return self.num_ng * len(self.data) if \ | ||
# self.is_training else len(self.data) | ||
|
||
def __getitem__(self, idx): | ||
user = self.data[idx][0] | ||
item_i = self.data[idx][1] | ||
if self.is_training: | ||
neg_data = self.neg_data | ||
item_j = neg_data[idx] | ||
return user, item_i, item_j | ||
else: | ||
return user, item_i | ||
# neg_data = self.neg_data if self.is_training else ([-1]*len(self.data)) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import torch as t | ||
from torch import nn | ||
from torch.nn import init | ||
import torch.nn.functional as F | ||
|
||
class MODEL(nn.Module): | ||
def __init__(self, args, userNum, itemNum, hide_dim, layer=[16,16]): | ||
super(MODEL, self).__init__() | ||
self.args = args | ||
self.userNum = userNum | ||
self.itemNum = itemNum | ||
self.hide_dim = hide_dim | ||
self.layer = [hide_dim] + layer | ||
self.embedding_dict = self.init_weight(userNum, itemNum, hide_dim) | ||
# GCN activation is leakyReLU | ||
slope = self.args.slope | ||
self.act = t.nn.LeakyReLU(negative_slope=slope) | ||
self.layers = nn.ModuleList() | ||
for i in range(0, len(self.layer)-1): | ||
self.layers.append(GCNLayer(self.layer[i], self.layer[i+1], weight=True, activation=self.act)) | ||
|
||
def init_weight(self, userNum, itemNum, hide_dim): | ||
initializer = nn.init.xavier_uniform_ | ||
embedding_dict = nn.ParameterDict({ | ||
'user_emb': nn.Parameter(initializer(t.empty(userNum, hide_dim))), | ||
'item_emb': nn.Parameter(initializer(t.empty(itemNum, hide_dim))), | ||
}) | ||
return embedding_dict | ||
|
||
def forward(self, adj): | ||
all_user_embeddings = [self.embedding_dict['user_emb']] | ||
all_item_embeddings = [self.embedding_dict['item_emb']] | ||
if len(self.layers) == 0: | ||
return self.embedding_dict['user_emb'], embedding_dict['item_embed'] | ||
|
||
for i, layer in enumerate(self.layers): | ||
if i == 0: | ||
embeddings = layer(adj, self.embedding_dict['user_emb'], self.embedding_dict['item_emb']) | ||
else: | ||
embeddings = layer(adj, embeddings[: self.userNum], embeddings[self.userNum: ]) | ||
|
||
norm_embeddings = F.normalize(embeddings, p=2, dim=1) | ||
all_user_embeddings += [norm_embeddings[: self.userNum]] | ||
all_item_embeddings += [norm_embeddings[self.userNum: ]] | ||
|
||
user_embedding = t.cat(all_user_embeddings, 1) | ||
item_embedding = t.cat(all_item_embeddings, 1) | ||
return user_embedding, item_embedding | ||
|
||
|
||
class GCNLayer(nn.Module): | ||
def __init__(self, in_dim, out_dim, weight=True, activation=None): | ||
super(GCNLayer, self).__init__() | ||
self.weight = weight | ||
if self.weight: | ||
self.u_w = nn.Parameter(t.Tensor(in_dim, out_dim)) | ||
self.v_w = nn.Parameter(t.Tensor(in_dim, out_dim)) | ||
init.xavier_uniform_(self.u_w) | ||
init.xavier_uniform_(self.v_w) | ||
self.act = activation | ||
|
||
def forward(self, adj, user_feat, item_feat): | ||
user_feat = t.mm(user_feat, self.u_w) | ||
item_feat = t.mm(item_feat, self.v_w) | ||
feat = t.cat([user_feat, item_feat], dim=0) | ||
feat = self.act(t.spmm(adj, feat)) | ||
return feat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import datetime | ||
|
||
logmsg = '' | ||
timemark = dict() | ||
saveDefault = False | ||
def log(msg, save=None, oneline=False): | ||
global logmsg | ||
global saveDefault | ||
time = datetime.datetime.now() | ||
tem = '%s: %s' % (time, msg) | ||
if save != None: | ||
if save: | ||
logmsg += tem + '\n' | ||
elif saveDefault: | ||
logmsg += tem + '\n' | ||
if oneline: | ||
print(tem, end='\r') | ||
else: | ||
print(tem) | ||
|
||
def marktime(marker): | ||
global timemark | ||
timemark[marker] = datetime.datetime.now() | ||
|
||
def SpentTime(marker): | ||
global timemark | ||
if marker not in timemark: | ||
msg = 'LOGGER ERROR, marker', marker, ' not found' | ||
tem = '%s: %s' % (time, msg) | ||
print(tem) | ||
return False | ||
return datetime.datetime.now() - timemark[marker] | ||
|
||
def SpentTooLong(marker, day=0, hour=0, minute=0, second=0): | ||
global timemark | ||
if marker not in timemark: | ||
msg = 'LOGGER ERROR, marker', marker, ' not found' | ||
tem = '%s: %s' % (time, msg) | ||
print(tem) | ||
return False | ||
return datetime.datetime.now() - timemark[marker] >= datetime.timedelta(days=day, hours=hour, minutes=minute, seconds=second) | ||
|
||
if __name__ == '__main__': | ||
log('') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import pickle as pk | ||
from ToolScripts.TimeLogger import log | ||
import torch as t | ||
import scipy.sparse as sp | ||
import numpy as np | ||
import os | ||
import networkx as nx | ||
def mkdir(dataset): | ||
DIR = os.path.join(os.getcwd(), "History", dataset) | ||
if not os.path.exists(DIR): | ||
os.makedirs(DIR) | ||
DIR = os.path.join(os.getcwd(), "Model", dataset) | ||
if not os.path.exists(DIR): | ||
os.makedirs(DIR) | ||
|
||
def matDropOut(mat, rate): | ||
assert rate < 1.0 | ||
log("mat nnz = %d"%(mat.nnz)) | ||
row_idx, col_idx = mat.nonzero() | ||
nums = int(mat.nnz * rate) | ||
idx = np.random.permutation(row_idx.shape[0])[: nums] | ||
res = sp.csr_matrix((np.ones_like(row_idx[idx]), (row_idx[idx], col_idx[idx])), shape=mat.shape) | ||
res = (res + sp.eye(mat.shape[0]) != 0) *1 | ||
assert res.max() == 1 | ||
log("mat nnz after dropout= %d"%(res.nnz)) | ||
return res | ||
|
||
def matExpand(uuMat, rate=0.001): | ||
# rate = 0.001 | ||
log("expand rate = %.4f"%(rate)) | ||
row, col = uuMat.shape | ||
for i in range(row): | ||
tmpMat = (sp.random(1, col, density=rate, format='csr') != 0) * 1 | ||
if i == 0: | ||
res = tmpMat | ||
else: | ||
res = sp.vstack((res, tmpMat)) | ||
res2 = res + uuMat | ||
res2 = (res2 != 0) * 1 | ||
log("expand count = %d"%(res2.nnz-uuMat.nnz)) | ||
return res | ||
|
||
|
||
def buildSubGraph(mat, subNode): | ||
node_num = mat.shape[0] | ||
graph = nx.Graph(mat) | ||
subGraphList = list(nx.connected_components(graph)) | ||
subGraphCount = len(subGraphList) | ||
node_subGraph = [-1 for i in range(node_num)] | ||
adjMat = sp.dok_matrix((subGraphCount, node_num), dtype=np.int) | ||
node_list = [] | ||
for i in range(len(subGraphList)): | ||
subGraphID = i | ||
subGraph = subGraphList[i] | ||
if len(subGraph) > subNode: | ||
node_list += list(subGraph) | ||
for node_id in subGraph: | ||
assert node_subGraph[node_id] == -1 | ||
node_subGraph[node_id] = subGraphID | ||
adjMat[subGraphID, node_id] = 1 | ||
node_subGraph = np.array(node_subGraph) | ||
assert np.sum(node_subGraph == -1) == 0 | ||
adjMat = adjMat.tocsr() | ||
return subGraphList, node_subGraph, adjMat, node_list | ||
|
||
def loadData(datasetStr): | ||
DIR = os.path.join(os.getcwd(), "dataset", datasetStr) | ||
log(DIR) | ||
with open(DIR + '/train.pkl', 'rb') as fs: | ||
trainMat = pk.load(fs) | ||
with open(DIR + '/test_data.pkl', 'rb') as fs: | ||
testData = pk.load(fs) | ||
with open(DIR + '/valid_data.pkl', 'rb') as fs: | ||
validData = pk.load(fs) | ||
return (trainMat, testData, validData) | ||
|
||
def sparse_mx_to_torch_sparse_tensor(sparse_mx): | ||
"""Convert a scipy sparse matrix to a torch sparse tensor.""" | ||
if type(sparse_mx) != sp.coo_matrix: | ||
sparse_mx = sparse_mx.tocoo().astype(np.float32) | ||
indices = t.from_numpy( | ||
np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) | ||
values = t.from_numpy(sparse_mx.data) | ||
shape = t.Size(sparse_mx.shape) | ||
return t.sparse.FloatTensor(indices, values, shape) | ||
|
||
def normalize_adj(adj): | ||
"""Symmetrically normalize adjacency matrix.""" | ||
adj = sp.coo_matrix(adj) | ||
rowsum = np.array(adj.sum(1)) | ||
d_inv_sqrt = np.power(rowsum, -0.5).flatten() | ||
d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0. | ||
d_mat_inv_sqrt = sp.diags(d_inv_sqrt) | ||
return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocsr() | ||
|
||
|
||
def generate_sp_ont_hot(num): | ||
mat = sp.eye(num) | ||
# mat = sp.dok_matrix((num, num)) | ||
# for i in range(num): | ||
# mat[i,i] = 1 | ||
ret = sparse_mx_to_torch_sparse_tensor(mat) | ||
return ret | ||
|
||
def load(path): | ||
with open(path, 'rb') as fs: | ||
data = pk.load(fs) | ||
return data | ||
|
||
|
||
|
||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import numpy as np | ||
import torch | ||
|
||
|
||
def hit(gt_item, pred_items): | ||
if gt_item in pred_items: | ||
return 1 | ||
return 0 | ||
|
||
|
||
def ndcg(gt_item, pred_items): | ||
if gt_item in pred_items: | ||
index = pred_items.index(gt_item) | ||
return np.reciprocal(np.log2(index+2)) | ||
return 0 |
Oops, something went wrong.