upload by civic_xhc

xhcgit · Dec 10, 2020 · 906cf74 · 906cf74
commit 906cf74
Show file tree

Hide file tree

Showing 9 changed files with 595 additions and 0 deletions.
diff --git a/BPRData.py b/BPRData.py
@@ -0,0 +1,51 @@
+import numpy as np 
+import scipy.sparse as sp
+
+import torch.utils.data as data
+
+import pickle
+
+class BPRData(data.Dataset):
+	def __init__(self, data, 
+				num_item, train_mat=None, num_ng=0, is_training=None):
+		super(BPRData, self).__init__()
+		""" Note that the labels are only useful when training, we thus 
+			add them in the ng_sample() function.
+		"""
+		self.data = np.array(data)
+		self.num_item = num_item
+		self.train_mat = train_mat
+		self.num_ng = num_ng
+		self.is_training = is_training
+
+	def ng_sample(self):
+		assert self.is_training, 'no need to sampling when testing'
+		tmp_trainMat = self.train_mat.todok()
+		length = self.data.shape[0]
+		self.neg_data = np.random.randint(low=0, high=self.num_item, size=length)
+
+		for i in range(length):
+			uid = self.data[i][0]
+			iid = self.neg_data[i]
+			if (uid, iid) in tmp_trainMat:
+				while (uid, iid) in tmp_trainMat:
+					iid = np.random.randint(low=0, high=self.num_item)
+				self.neg_data[i] = iid
+
+	def __len__(self):
+		return len(self.data)
+		# return self.num_ng * len(self.data) if \
+		# 		self.is_training else len(self.data)
+
+	def __getitem__(self, idx):
+		user = self.data[idx][0]
+		item_i = self.data[idx][1]
+		if self.is_training:
+			neg_data = self.neg_data
+			item_j = neg_data[idx]
+			return user, item_i, item_j 
+		else:
+			return user, item_i
+		# neg_data = self.neg_data if self.is_training else ([-1]*len(self.data))
+
+
diff --git a/MyGCN.py b/MyGCN.py
@@ -0,0 +1,67 @@
+import torch as t
+from torch import nn
+from torch.nn import init
+import torch.nn.functional as F
+
+class MODEL(nn.Module):
+    def __init__(self, args, userNum, itemNum, hide_dim, layer=[16,16]):
+        super(MODEL, self).__init__()
+        self.args = args
+        self.userNum = userNum
+        self.itemNum = itemNum
+        self.hide_dim = hide_dim
+        self.layer = [hide_dim] + layer
+        self.embedding_dict = self.init_weight(userNum, itemNum, hide_dim)
+        # GCN activation is leakyReLU
+        slope = self.args.slope
+        self.act = t.nn.LeakyReLU(negative_slope=slope)
+        self.layers = nn.ModuleList()
+        for i in range(0, len(self.layer)-1):
+                self.layers.append(GCNLayer(self.layer[i], self.layer[i+1], weight=True, activation=self.act))
+
+    def init_weight(self, userNum, itemNum, hide_dim):
+        initializer = nn.init.xavier_uniform_
+        embedding_dict = nn.ParameterDict({
+            'user_emb': nn.Parameter(initializer(t.empty(userNum, hide_dim))),
+            'item_emb': nn.Parameter(initializer(t.empty(itemNum, hide_dim))),
+        })
+        return embedding_dict
+
+    def forward(self, adj):
+        all_user_embeddings = [self.embedding_dict['user_emb']]
+        all_item_embeddings = [self.embedding_dict['item_emb']]
+        if len(self.layers) == 0:
+            return self.embedding_dict['user_emb'], embedding_dict['item_embed']
+
+        for i, layer in enumerate(self.layers):
+            if i == 0:
+                embeddings = layer(adj, self.embedding_dict['user_emb'], self.embedding_dict['item_emb'])
+            else:
+                embeddings = layer(adj, embeddings[: self.userNum], embeddings[self.userNum: ])
+
+            norm_embeddings = F.normalize(embeddings, p=2, dim=1)
+            all_user_embeddings += [norm_embeddings[: self.userNum]]
+            all_item_embeddings += [norm_embeddings[self.userNum: ]]
+
+        user_embedding = t.cat(all_user_embeddings, 1)
+        item_embedding = t.cat(all_item_embeddings, 1)
+        return user_embedding, item_embedding
+
+
+class GCNLayer(nn.Module):
+    def __init__(self, in_dim, out_dim, weight=True, activation=None):
+        super(GCNLayer, self).__init__()
+        self.weight = weight
+        if self.weight:
+            self.u_w = nn.Parameter(t.Tensor(in_dim, out_dim))
+            self.v_w = nn.Parameter(t.Tensor(in_dim, out_dim))
+            init.xavier_uniform_(self.u_w)
+            init.xavier_uniform_(self.v_w)
+        self.act = activation
+
+    def forward(self, adj, user_feat, item_feat):
+        user_feat = t.mm(user_feat, self.u_w)
+        item_feat = t.mm(item_feat, self.v_w)
+        feat = t.cat([user_feat, item_feat], dim=0)
+        feat = self.act(t.spmm(adj, feat))
+        return feat
diff --git a/ToolScripts/TimeLogger.py b/ToolScripts/TimeLogger.py
@@ -0,0 +1,44 @@
+import datetime
+
+logmsg = ''
+timemark = dict()
+saveDefault = False
+def log(msg, save=None, oneline=False):
+	global logmsg
+	global saveDefault
+	time = datetime.datetime.now()
+	tem = '%s: %s' % (time, msg)
+	if save != None:
+		if save:
+			logmsg += tem + '\n'
+	elif saveDefault:
+		logmsg += tem + '\n'
+	if oneline:
+		print(tem, end='\r')
+	else:
+		print(tem)
+
+def marktime(marker):
+	global timemark
+	timemark[marker] = datetime.datetime.now()
+
+def SpentTime(marker):
+	global timemark
+	if marker not in timemark:
+		msg = 'LOGGER ERROR, marker', marker, ' not found'
+		tem = '%s: %s' % (time, msg)
+		print(tem)
+		return False
+	return datetime.datetime.now() - timemark[marker]
+
+def SpentTooLong(marker, day=0, hour=0, minute=0, second=0):
+	global timemark
+	if marker not in timemark:
+		msg = 'LOGGER ERROR, marker', marker, ' not found'
+		tem = '%s: %s' % (time, msg)
+		print(tem)
+		return False
+	return datetime.datetime.now() - timemark[marker] >= datetime.timedelta(days=day, hours=hour, minutes=minute, seconds=second)
+
+if __name__ == '__main__':
+	log('')
diff --git a/ToolScripts/utils.py b/ToolScripts/utils.py
@@ -0,0 +1,112 @@
+import pickle as pk
+from ToolScripts.TimeLogger import log
+import torch as t
+import scipy.sparse as sp
+import numpy as np
+import os
+import networkx as nx
+def mkdir(dataset):
+    DIR = os.path.join(os.getcwd(), "History", dataset)
+    if not os.path.exists(DIR):
+        os.makedirs(DIR)
+    DIR = os.path.join(os.getcwd(), "Model", dataset)
+    if not os.path.exists(DIR):
+        os.makedirs(DIR)
+
+def matDropOut(mat, rate):
+    assert rate < 1.0
+    log("mat nnz = %d"%(mat.nnz))
+    row_idx, col_idx = mat.nonzero()
+    nums = int(mat.nnz * rate)
+    idx = np.random.permutation(row_idx.shape[0])[: nums]
+    res = sp.csr_matrix((np.ones_like(row_idx[idx]), (row_idx[idx], col_idx[idx])), shape=mat.shape)
+    res = (res + sp.eye(mat.shape[0]) != 0) *1
+    assert res.max() == 1
+    log("mat nnz after dropout= %d"%(res.nnz))
+    return res
+
+def matExpand(uuMat, rate=0.001):
+    # rate = 0.001
+    log("expand rate = %.4f"%(rate))
+    row, col = uuMat.shape
+    for i in range(row):
+        tmpMat = (sp.random(1, col, density=rate, format='csr') != 0) * 1
+        if i == 0:
+            res = tmpMat
+        else:
+            res = sp.vstack((res, tmpMat))
+    res2 = res + uuMat
+    res2 = (res2 != 0) * 1
+    log("expand count = %d"%(res2.nnz-uuMat.nnz))
+    return res
+
+
+def buildSubGraph(mat, subNode):
+    node_num = mat.shape[0]
+    graph = nx.Graph(mat)
+    subGraphList = list(nx.connected_components(graph))
+    subGraphCount = len(subGraphList)
+    node_subGraph = [-1 for i in range(node_num)]
+    adjMat = sp.dok_matrix((subGraphCount, node_num), dtype=np.int)
+    node_list = []
+    for i in range(len(subGraphList)):
+        subGraphID = i
+        subGraph = subGraphList[i]
+        if len(subGraph) > subNode:
+            node_list += list(subGraph)
+        for node_id in subGraph:
+            assert node_subGraph[node_id] == -1
+            node_subGraph[node_id] = subGraphID
+            adjMat[subGraphID, node_id] = 1
+    node_subGraph = np.array(node_subGraph)
+    assert np.sum(node_subGraph == -1) == 0 
+    adjMat = adjMat.tocsr()
+    return subGraphList, node_subGraph, adjMat, node_list
+
+def loadData(datasetStr):
+    DIR = os.path.join(os.getcwd(), "dataset", datasetStr)
+    log(DIR)
+    with open(DIR + '/train.pkl', 'rb') as fs:
+        trainMat = pk.load(fs)
+    with open(DIR + '/test_data.pkl', 'rb') as fs:
+        testData = pk.load(fs)
+    with open(DIR + '/valid_data.pkl', 'rb') as fs:
+        validData = pk.load(fs)
+    return (trainMat, testData, validData)
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    if type(sparse_mx) != sp.coo_matrix:
+        sparse_mx = sparse_mx.tocoo().astype(np.float32)
+    indices = t.from_numpy(
+        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
+    values = t.from_numpy(sparse_mx.data)
+    shape = t.Size(sparse_mx.shape)
+    return t.sparse.FloatTensor(indices, values, shape)
+
+def normalize_adj(adj):
+    """Symmetrically normalize adjacency matrix."""
+    adj = sp.coo_matrix(adj)
+    rowsum = np.array(adj.sum(1))
+    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
+    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
+    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
+    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocsr()
+
+
+def generate_sp_ont_hot(num):
+    mat = sp.eye(num)
+    # mat = sp.dok_matrix((num, num))
+    # for i in range(num):
+    #     mat[i,i] = 1
+    ret = sparse_mx_to_torch_sparse_tensor(mat)
+    return ret
+
+def load(path):
+    with open(path, 'rb') as fs:
+        data = pk.load(fs)
+    return data
+
+
+
+
diff --git a/dataset/Yelp/test_data.pkl b/dataset/Yelp/test_data.pkl
diff --git a/dataset/Yelp/train.pkl b/dataset/Yelp/train.pkl
diff --git a/dataset/Yelp/valid_data.pkl b/dataset/Yelp/valid_data.pkl
diff --git a/evaluate.py b/evaluate.py
@@ -0,0 +1,15 @@
+import numpy as np
+import torch
+
+
+def hit(gt_item, pred_items):
+	if gt_item in pred_items:
+		return 1
+	return 0
+
+
+def ndcg(gt_item, pred_items):
+	if gt_item in pred_items:
+		index = pred_items.index(gt_item)
+		return np.reciprocal(np.log2(index+2))
+	return 0