1+ # A partial implementation of https://arxiv.org/abs/2109.02157
2+
3+ import torch
4+ import torch .nn as nn
5+ import torch .nn .functional as F
6+ from torch .utils .data import Dataset , DataLoader
7+ import torch .optim as optim
8+ import torch .optim .lr_scheduler as lr_scheduler
9+
10+ # Note: this example requires the napkinXC library: https://napkinxc.readthedocs.io/
11+ from napkinxc .datasets import load_dataset
12+ from napkinxc .measures import precision_at_k
13+
14+ from tqdm import tqdm
15+ import torchhd
16+ from torchhd import embeddings , HRRTensor
17+ import torchhd .tensors
18+ from scipy .sparse import vstack , lil_matrix
19+ import numpy as np
20+
21+
22+ device = torch .device ("cuda" if torch .cuda .is_available () else "cpu" )
23+ print ("Using {} device" .format (device ))
24+
25+
26+ DIMENSIONS = 400
27+ NUMBER_OF_EPOCHS = 1
28+ BATCH_SIZE = 1
29+ DATASET_NAME = "eurlex-4k" # tested on "eurlex-4k", and "Wiki10-31K"
30+ FC_LAYER_SIZE = 512
31+
32+
33+ def sparse_batch_collate (batch :list ):
34+ """
35+ Collate function which to transform scipy csr matrix to pytorch sparse tensor
36+ """
37+ data_batch , targets_batch = zip (* batch )
38+
39+ data_batch = vstack (data_batch ).tocoo ()
40+ data_batch = torch .sparse_coo_tensor (np .array (data_batch .nonzero ()), data_batch .data , data_batch .shape )
41+
42+ targets_batch = torch .stack (targets_batch )
43+
44+ return data_batch , targets_batch
45+
46+ class multilabel_dataset (Dataset ):
47+ def __init__ (self ,x ,y ,n_classes ) -> None :
48+ self .x = x
49+ self .y = y
50+ self .n_classes = n_classes
51+
52+
53+ # Define the length of the dataset.
54+ def __len__ (self ):
55+ return self .x .shape [0 ]
56+
57+ # Return a single sample from the dataset.
58+ def __getitem__ (self , idx ):
59+ labels = torch .zeros (self .n_classes , dtype = torch .int64 )
60+ labels [self .y [idx ]] = 1.0
61+ return self .x [idx ], labels
62+
63+
64+ X_train , Y_train = load_dataset (DATASET_NAME , "train" , verbose = True )
65+ X_test , Y_test = load_dataset (DATASET_NAME , "test" , verbose = True )
66+
67+
68+ if DATASET_NAME == "Wiki10-31K" : # Because of this issue https://github.com/mwydmuch/napkinXC/issues/18
69+ X_train = lil_matrix (X_train [:,:- 1 ])
70+
71+ N_features = X_train .shape [1 ]
72+ N_classes = max (max (classes ) for classes in Y_train if classes != []) + 1
73+
74+ train_dataset = multilabel_dataset (X_train ,Y_train ,N_classes )
75+ train_dataloader = DataLoader (train_dataset ,BATCH_SIZE , collate_fn = sparse_batch_collate )
76+ test_dataset = multilabel_dataset (X_test ,Y_test ,N_classes )
77+ test_dataloader = DataLoader (test_dataset ,collate_fn = sparse_batch_collate )
78+
79+
80+ print ("Traning on \033 [1m {} \033 [0m. It has {} features, and {} classes."
81+ .format (DATASET_NAME ,N_features ,N_classes ))
82+
83+
84+ # Fully Connected model for the baseline comparision
85+ class FC (nn .Module ):
86+ def __init__ (self , num_features , num_classes ):
87+ super (FC , self ).__init__ ()
88+ self .num_classes = num_classes
89+ self .num_features = num_features
90+ self .fc_layer_size = FC_LAYER_SIZE
91+
92+ # Network Layers
93+ self .fc1 = nn .Linear (self .num_features , self .fc_layer_size )
94+ self .fc2 = nn .Linear (self .fc_layer_size , self .fc_layer_size )
95+ self .olayer = nn .Linear (self .fc_layer_size , self .num_classes )
96+
97+ def forward (self , x ):
98+ x = F .leaky_relu (self .fc1 (x ))
99+ x = F .leaky_relu (self .fc2 (x ))
100+ x = self .olayer (x )
101+ return x
102+
103+ def pred (self , out ,threshold = 0.5 ):
104+ y = F .sigmoid (out )
105+ v ,i = y .sort (descending = True )
106+ ids = i [v >= threshold ]
107+ ids = ids .tolist ()
108+ return ids
109+
110+ def loss (self ,out ,target ):
111+ loss = nn .BCEWithLogitsLoss ()(out , target .type (torch .float64 ))
112+ return loss
113+
114+ # Modified version of FC model that returns an HRRTensor with dim << output of the FC model.
115+ # It makes the model to have fewer parameters
116+ class FCHRR (nn .Module ):
117+ def __init__ (self , num_features , num_classes ,dim ):
118+ super (FCHRR , self ).__init__ ()
119+ self .num_classes = num_classes
120+ self .num_features = num_features
121+ self .fc_layer_size = FC_LAYER_SIZE
122+ self .dim = dim
123+
124+ self .classes_vec = embeddings .Random (N_classes , dim ,vsa = "HRR" )
125+ n_vec , p_vec = torchhd .HRRTensor .random (2 ,dim )
126+ self .register_buffer ("n_vec" , n_vec )
127+ self .register_buffer ("p_vec" , p_vec )
128+
129+ # Network Layers
130+ self .fc1 = nn .Linear (self .num_features , self .fc_layer_size )
131+ self .fc2 = nn .Linear (self .fc_layer_size , self .fc_layer_size )
132+ self .olayer = nn .Linear (self .fc_layer_size , dim )
133+
134+ def forward (self , x ):
135+ x = F .leaky_relu (self .fc1 (x ))
136+ x = F .leaky_relu (self .fc2 (x ))
137+ x = self .olayer (x )
138+ return x .as_subclass (HRRTensor )
139+
140+ def pred (self , out ,threshold = 0.1 ):
141+
142+ tmp_positive = self .p_vec .exact_inverse ().bind (out )
143+ sims = tmp_positive .cosine_similarity (self .classes_vec .weight )
144+
145+ v ,i = sims .sort (descending = True )
146+ ids = i [v >= threshold ]
147+ ids = ids .tolist ()
148+
149+ return ids
150+
151+ def loss (self ,out ,target ):
152+
153+ loss = torch .tensor (0 , dtype = torch .float32 ,device = device )
154+
155+ tmp_positives = self .p_vec .exact_inverse ().bind (out )
156+ tmp_negatives = self .n_vec .exact_inverse ().bind (out )
157+ for i in range (target .shape [0 ]):
158+
159+ cp = self .classes_vec .weight [target [i ]== 1 ,:]
160+
161+ j_p = (1 - tmp_positives [i ].cosine_similarity (cp )).sum ()
162+ j_n = tmp_negatives [i ].cosine_similarity (cp .multibundle ())
163+
164+ loss += j_p + j_n
165+
166+ loss /= target .shape [0 ]
167+
168+ return loss
169+
170+
171+
172+ hrr_model = FCHRR (N_features ,N_classes ,DIMENSIONS )
173+ hrr_model = hrr_model .to (device )
174+
175+ baseline_model = FC (N_features ,N_classes )
176+ baseline_model = baseline_model .to (device )
177+
178+
179+ for model_name , model in {"HRR-FC" :hrr_model ,"FC" :baseline_model }.items ():
180+ optimizer = optim .Adam (model .parameters (), lr = 0.001 )
181+ scheduler = lr_scheduler .StepLR (optimizer , step_size = 1 , gamma = 0.7 )
182+ model .train ()
183+ for epoch in tqdm (range (1 ,NUMBER_OF_EPOCHS + 1 ), desc = f"{ model_name } epochs" ,leave = False ):
184+
185+ for samples , labels in tqdm (train_dataloader , desc = "Training" ,leave = False ):
186+ samples = samples .to (device )
187+ labels = labels .to (device )
188+ optimizer .zero_grad ()
189+ out = model (samples )
190+ loss = model .loss (out , labels )
191+ loss .backward ()
192+ optimizer .step ()
193+
194+ scheduler .step ()
195+
196+ Y_pred = []
197+ model .eval ()
198+ with torch .no_grad ():
199+ for data , target in tqdm (test_dataloader ,desc = "Validating" ,leave = False ):
200+ data , target = data .to (device ).float (), target .to (device )
201+ out = model (data )
202+ ids = model .pred (out )
203+ Y_pred .append (ids )
204+
205+ # Calculating the P@1 metric
206+ p_at_1 = precision_at_k (Y_test , Y_pred , k = 1 )[0 ]
207+ print ("Result of {} model ----> P@1 = {}" .format (model_name , p_at_1 ))
0 commit comments