Skip to content

Commit

Permalink
upload by civic_xhc
Browse files Browse the repository at this point in the history
  • Loading branch information
xhcdream committed Dec 10, 2020
0 parents commit 906cf74
Show file tree
Hide file tree
Showing 9 changed files with 595 additions and 0 deletions.
51 changes: 51 additions & 0 deletions BPRData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import numpy as np
import scipy.sparse as sp

import torch.utils.data as data

import pickle

class BPRData(data.Dataset):
def __init__(self, data,
num_item, train_mat=None, num_ng=0, is_training=None):
super(BPRData, self).__init__()
""" Note that the labels are only useful when training, we thus
add them in the ng_sample() function.
"""
self.data = np.array(data)
self.num_item = num_item
self.train_mat = train_mat
self.num_ng = num_ng
self.is_training = is_training

def ng_sample(self):
assert self.is_training, 'no need to sampling when testing'
tmp_trainMat = self.train_mat.todok()
length = self.data.shape[0]
self.neg_data = np.random.randint(low=0, high=self.num_item, size=length)

for i in range(length):
uid = self.data[i][0]
iid = self.neg_data[i]
if (uid, iid) in tmp_trainMat:
while (uid, iid) in tmp_trainMat:
iid = np.random.randint(low=0, high=self.num_item)
self.neg_data[i] = iid

def __len__(self):
return len(self.data)
# return self.num_ng * len(self.data) if \
# self.is_training else len(self.data)

def __getitem__(self, idx):
user = self.data[idx][0]
item_i = self.data[idx][1]
if self.is_training:
neg_data = self.neg_data
item_j = neg_data[idx]
return user, item_i, item_j
else:
return user, item_i
# neg_data = self.neg_data if self.is_training else ([-1]*len(self.data))


67 changes: 67 additions & 0 deletions MyGCN.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import torch as t
from torch import nn
from torch.nn import init
import torch.nn.functional as F

class MODEL(nn.Module):
def __init__(self, args, userNum, itemNum, hide_dim, layer=[16,16]):
super(MODEL, self).__init__()
self.args = args
self.userNum = userNum
self.itemNum = itemNum
self.hide_dim = hide_dim
self.layer = [hide_dim] + layer
self.embedding_dict = self.init_weight(userNum, itemNum, hide_dim)
# GCN activation is leakyReLU
slope = self.args.slope
self.act = t.nn.LeakyReLU(negative_slope=slope)
self.layers = nn.ModuleList()
for i in range(0, len(self.layer)-1):
self.layers.append(GCNLayer(self.layer[i], self.layer[i+1], weight=True, activation=self.act))

def init_weight(self, userNum, itemNum, hide_dim):
initializer = nn.init.xavier_uniform_
embedding_dict = nn.ParameterDict({
'user_emb': nn.Parameter(initializer(t.empty(userNum, hide_dim))),
'item_emb': nn.Parameter(initializer(t.empty(itemNum, hide_dim))),
})
return embedding_dict

def forward(self, adj):
all_user_embeddings = [self.embedding_dict['user_emb']]
all_item_embeddings = [self.embedding_dict['item_emb']]
if len(self.layers) == 0:
return self.embedding_dict['user_emb'], embedding_dict['item_embed']

for i, layer in enumerate(self.layers):
if i == 0:
embeddings = layer(adj, self.embedding_dict['user_emb'], self.embedding_dict['item_emb'])
else:
embeddings = layer(adj, embeddings[: self.userNum], embeddings[self.userNum: ])

norm_embeddings = F.normalize(embeddings, p=2, dim=1)
all_user_embeddings += [norm_embeddings[: self.userNum]]
all_item_embeddings += [norm_embeddings[self.userNum: ]]

user_embedding = t.cat(all_user_embeddings, 1)
item_embedding = t.cat(all_item_embeddings, 1)
return user_embedding, item_embedding


class GCNLayer(nn.Module):
def __init__(self, in_dim, out_dim, weight=True, activation=None):
super(GCNLayer, self).__init__()
self.weight = weight
if self.weight:
self.u_w = nn.Parameter(t.Tensor(in_dim, out_dim))
self.v_w = nn.Parameter(t.Tensor(in_dim, out_dim))
init.xavier_uniform_(self.u_w)
init.xavier_uniform_(self.v_w)
self.act = activation

def forward(self, adj, user_feat, item_feat):
user_feat = t.mm(user_feat, self.u_w)
item_feat = t.mm(item_feat, self.v_w)
feat = t.cat([user_feat, item_feat], dim=0)
feat = self.act(t.spmm(adj, feat))
return feat
44 changes: 44 additions & 0 deletions ToolScripts/TimeLogger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import datetime

logmsg = ''
timemark = dict()
saveDefault = False
def log(msg, save=None, oneline=False):
global logmsg
global saveDefault
time = datetime.datetime.now()
tem = '%s: %s' % (time, msg)
if save != None:
if save:
logmsg += tem + '\n'
elif saveDefault:
logmsg += tem + '\n'
if oneline:
print(tem, end='\r')
else:
print(tem)

def marktime(marker):
global timemark
timemark[marker] = datetime.datetime.now()

def SpentTime(marker):
global timemark
if marker not in timemark:
msg = 'LOGGER ERROR, marker', marker, ' not found'
tem = '%s: %s' % (time, msg)
print(tem)
return False
return datetime.datetime.now() - timemark[marker]

def SpentTooLong(marker, day=0, hour=0, minute=0, second=0):
global timemark
if marker not in timemark:
msg = 'LOGGER ERROR, marker', marker, ' not found'
tem = '%s: %s' % (time, msg)
print(tem)
return False
return datetime.datetime.now() - timemark[marker] >= datetime.timedelta(days=day, hours=hour, minutes=minute, seconds=second)

if __name__ == '__main__':
log('')
112 changes: 112 additions & 0 deletions ToolScripts/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import pickle as pk
from ToolScripts.TimeLogger import log
import torch as t
import scipy.sparse as sp
import numpy as np
import os
import networkx as nx
def mkdir(dataset):
DIR = os.path.join(os.getcwd(), "History", dataset)
if not os.path.exists(DIR):
os.makedirs(DIR)
DIR = os.path.join(os.getcwd(), "Model", dataset)
if not os.path.exists(DIR):
os.makedirs(DIR)

def matDropOut(mat, rate):
assert rate < 1.0
log("mat nnz = %d"%(mat.nnz))
row_idx, col_idx = mat.nonzero()
nums = int(mat.nnz * rate)
idx = np.random.permutation(row_idx.shape[0])[: nums]
res = sp.csr_matrix((np.ones_like(row_idx[idx]), (row_idx[idx], col_idx[idx])), shape=mat.shape)
res = (res + sp.eye(mat.shape[0]) != 0) *1
assert res.max() == 1
log("mat nnz after dropout= %d"%(res.nnz))
return res

def matExpand(uuMat, rate=0.001):
# rate = 0.001
log("expand rate = %.4f"%(rate))
row, col = uuMat.shape
for i in range(row):
tmpMat = (sp.random(1, col, density=rate, format='csr') != 0) * 1
if i == 0:
res = tmpMat
else:
res = sp.vstack((res, tmpMat))
res2 = res + uuMat
res2 = (res2 != 0) * 1
log("expand count = %d"%(res2.nnz-uuMat.nnz))
return res


def buildSubGraph(mat, subNode):
node_num = mat.shape[0]
graph = nx.Graph(mat)
subGraphList = list(nx.connected_components(graph))
subGraphCount = len(subGraphList)
node_subGraph = [-1 for i in range(node_num)]
adjMat = sp.dok_matrix((subGraphCount, node_num), dtype=np.int)
node_list = []
for i in range(len(subGraphList)):
subGraphID = i
subGraph = subGraphList[i]
if len(subGraph) > subNode:
node_list += list(subGraph)
for node_id in subGraph:
assert node_subGraph[node_id] == -1
node_subGraph[node_id] = subGraphID
adjMat[subGraphID, node_id] = 1
node_subGraph = np.array(node_subGraph)
assert np.sum(node_subGraph == -1) == 0
adjMat = adjMat.tocsr()
return subGraphList, node_subGraph, adjMat, node_list

def loadData(datasetStr):
DIR = os.path.join(os.getcwd(), "dataset", datasetStr)
log(DIR)
with open(DIR + '/train.pkl', 'rb') as fs:
trainMat = pk.load(fs)
with open(DIR + '/test_data.pkl', 'rb') as fs:
testData = pk.load(fs)
with open(DIR + '/valid_data.pkl', 'rb') as fs:
validData = pk.load(fs)
return (trainMat, testData, validData)

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
"""Convert a scipy sparse matrix to a torch sparse tensor."""
if type(sparse_mx) != sp.coo_matrix:
sparse_mx = sparse_mx.tocoo().astype(np.float32)
indices = t.from_numpy(
np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
values = t.from_numpy(sparse_mx.data)
shape = t.Size(sparse_mx.shape)
return t.sparse.FloatTensor(indices, values, shape)

def normalize_adj(adj):
"""Symmetrically normalize adjacency matrix."""
adj = sp.coo_matrix(adj)
rowsum = np.array(adj.sum(1))
d_inv_sqrt = np.power(rowsum, -0.5).flatten()
d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocsr()


def generate_sp_ont_hot(num):
mat = sp.eye(num)
# mat = sp.dok_matrix((num, num))
# for i in range(num):
# mat[i,i] = 1
ret = sparse_mx_to_torch_sparse_tensor(mat)
return ret

def load(path):
with open(path, 'rb') as fs:
data = pk.load(fs)
return data




Binary file added dataset/Yelp/test_data.pkl
Binary file not shown.
Binary file added dataset/Yelp/train.pkl
Binary file not shown.
Binary file added dataset/Yelp/valid_data.pkl
Binary file not shown.
15 changes: 15 additions & 0 deletions evaluate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import numpy as np
import torch


def hit(gt_item, pred_items):
if gt_item in pred_items:
return 1
return 0


def ndcg(gt_item, pred_items):
if gt_item in pred_items:
index = pred_items.index(gt_item)
return np.reciprocal(np.log2(index+2))
return 0
Loading

0 comments on commit 906cf74

Please sign in to comment.