-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_train.py
66 lines (52 loc) · 2.14 KB
/
model_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
################# Hyperparameters ############################################
N_TRAINING_EPISODES = 80000
N_VALIDATION_TASKS = 100
####################### train-sampler ############################################
train_dataset.get_labels = lambda: [instance[1] for instance in train_dataset]
train_sampler = TaskSampler(
train_dataset, n_way=N_WAY, n_shot=N_SHOT, n_query=N_QUERY, n_tasks=N_TRAINING_EPISODES
)
########## Train loader #############
train_loader = DataLoader(
train_dataset,
batch_sampler=train_sampler,
num_workers=12,
pin_memory=True,
collate_fn=train_sampler.episodic_collate_fn,
)
######################### Cross-entropy loss. #################################################
criterion1 = nn.CrossEntropyLoss() # Cross entropy loss as first loss.
optimizer = optim.Adam(model.parameters(), lr=0.001) # Optimizer with learning rate of 1e-3.
############################### Model fit function ############################################
def fit(
support_images: torch.Tensor,
support_labels: torch.Tensor,
query_images: torch.Tensor,
query_labels: torch.Tensor,
) -> float:
optimizer.zero_grad()
classification_scores = model(
support_images.cuda(), support_labels.cuda(), query_images.cuda()
)
loss1 = criterion1(classification_scores, query_labels.cuda())
loss2 = Hesimloss(classification_scores, query_labels.cuda())
loss = loss1 + 0.5*loss2
loss.backward()
optimizer.step()
return loss.item()
################################### Training of model ##################################
log_update_frequency = 10
all_loss = []
model.train()
with tqdm(enumerate(train_loader), total=len(train_loader)) as tqdm_train:
for episode_index, (
support_images,
support_labels,
query_images,
query_labels,
_,
) in tqdm_train:
loss_value = fit(support_images.float(), support_labels, query_images.float(), query_labels)
all_loss.append(loss_value)
if episode_index % log_update_frequency == 0:
tqdm_train.set_postfix(loss=sliding_average(all_loss, log_update_frequency))