Skip to content

Commit

Permalink
Update For New Versions
Browse files Browse the repository at this point in the history
  • Loading branch information
Jiacheng-WU committed Apr 5, 2021
1 parent c1a5314 commit 93f8217
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 30 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
venv
.idea
aqp.pt
__pycache__
3 changes: 2 additions & 1 deletion compose.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,5 @@ def compose(input_res, attr_num=10):
count_one = np.sum(convert_int_to_bool_list(i))
sign = (-1)**count_one
res = res + sign * input_res[i]
return res
# Due to precision error, sometimes res while be small negative, we fix it by max
return max(res, 0)
1 change: 1 addition & 0 deletions globals.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
ATTR_NUM = 10
SHUFFLE_TIME = 10
MODEL_SAVE_PATH = "aqp.pt"

# example for train sets, it can be simply csv with n rows and 11 columns if we have 10 attributes
Expand Down
25 changes: 16 additions & 9 deletions net.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,38 @@
import torch.optim as optim
import numpy as np
import math
from .shuffle import shuffle
from shuffle import shuffle


class AQPNet(nn.Module):

def __init__(self, attr_num):
def __init__(self, attr_num, shuffle_time):
super(AQPNet, self).__init__()
self.attr_num = attr_num
self.conv1 = nn.Conv2d(1, 8, 5, padding=2, padding_mode='circular')
self.conv2 = nn.Conv2d(8, 16, 3, padding=1, padding_mode='circular')
self.fc1 = nn.Linear(math.ceil(attr_num / 2) * 32, 128)
first_level_pad = 2
second_level_pad = 1
total_pad = first_level_pad * second_level_pad
assert attr_num % total_pad == 0
assert shuffle_time % total_pad == 0
self.conv1 = nn.Conv2d(1, 8, 5, padding=first_level_pad, padding_mode='circular')
self.conv2 = nn.Conv2d(8, 16, 3, padding=second_level_pad, padding_mode='circular')
# After convolution, the size of length and width would be deduced to 1 / total_pad of the origin
# Thus, after flatten, we only have attr_num//total_pad * shuffle_time//total_pad * 16 vals
self.fc1 = nn.Linear(attr_num//total_pad * shuffle_time//total_pad * 16, 128)
self.fc2 = nn.Linear(128, 1)
self.do1 = nn.Dropout(0.5)
self.do2 = nn.Dropout(0.5)

def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = F.max_pool1d(x, 2, ceil_mode=True)
x = F.max_pool2d(x, 2, ceil_mode=True)
x = self.conv2(x)
x = F.relu(x)
x = torch.flatten(x)
# the flatten should not participate on the dim 0, which is batch dim
x = torch.flatten(x, start_dim=1)
x = self.fc1(x)
x = self.do1(x)
x = F.relu(x)
x = self.fc2(x)
x = self.do1(x)
return F.sigmoid(x)
return torch.sigmoid(x)
33 changes: 23 additions & 10 deletions query.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,45 @@
import torch.optim as optim
import torchvision
import numpy as np
from .net import AQPNet
from .shuffle import shuffle, shuffle_batch
from .compose import compose, decompose
from .globals import *
from net import AQPNet
from shuffle import shuffle, shuffle_batch
from compose import compose, decompose
from globals import *


def load_model():
net = AQPNet(ATTR_NUM*ATTR_NUM)
def load_model(device):
net = AQPNet(ATTR_NUM, SHUFFLE_TIME)
net.load_state_dict(torch.load(MODEL_SAVE_PATH))
device = torch.device("cuda")
model = net.to(device)
model.eval()
return model


def do_query(query):
model = load_model()
device = torch.device("cuda")
model = load_model(device)
output_queries = decompose(query, ATTR_NUM)
shuffle_output_queries = shuffle_batch(output_queries)
tensor_queries = torch.from_numpy(np.array(shuffle_output_queries))
# print(output_queries)
shuffle_output_queries = shuffle_batch(output_queries, ATTR_NUM, SHUFFLE_TIME)
tensor_queries = torch.from_numpy(np.array(shuffle_output_queries)).to(device=device, dtype=torch.float)
queries_size = list(tensor_queries.size())
queries_size.insert(1, 1)
tensor_queries = torch.reshape(tensor_queries, queries_size)
output_tensors = model(tensor_queries)
output_array = output_tensors.data.cpu().numpy()
output_array = np.reshape(output_array, output_array.size)
res = compose(output_array)
return res


def main():
# example query
query = np.array([[0.2, 0.3], [0.4, 0.5], [0.3, 0.9], [0.1, 0.7], [0.7, 1.0],
[0.0, 1.0], [0.1, 0.8], [0.7, 0.8], [0.2, 0.6], [0.4, 0.6]])
res = do_query(query)
print("res: ", res)
# The res here are the percentage of estimated number in the whole number


if __name__ == '__main__':
main()
4 changes: 2 additions & 2 deletions shuffle.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np


# We use the following idea to shuffle
# input = input[::2] + input[1::2]
# and repeat A = ceil(log2(attr_num)) times
Expand All @@ -8,7 +9,6 @@
# The reason we try to do this shuffle is to make the connections more arbitrary
# Rather than simply use the locality from nearest attributes due to the Conv
# OR we can create/investigate more proper network to fit if possible

# input attr_data is like [0.2,0.4,..., 0.5]
def shuffle(attr_data, attr_num=10, shuffle_times=10):
assert attr_num == shuffle_times
Expand All @@ -25,4 +25,4 @@ def shuffle(attr_data, attr_num=10, shuffle_times=10):


def shuffle_batch(batch, attr_num=10, shuffle_times=10):
return [shuffle(attr_data) for attr_data in batch]
return [shuffle(attr_data, attr_num, shuffle_times) for attr_data in batch]
27 changes: 19 additions & 8 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import torchvision
import numpy as np
import pandas as pd
from .net import AQPNet
from .shuffle import shuffle
from .globals import *
from net import AQPNet
from shuffle import shuffle, shuffle_batch
from globals import *
# We assume the attribute is labeled from 0 or we need normalize
# We have 10 attributes base on Professor Wang Ying

Expand All @@ -18,7 +18,14 @@ def train(model, device, data, target, optimizer, batch_size):
for i in range(len(target) // batch_size):
batch_data = data[i*batch_size:(i+1)*batch_size]
batch_target = target[i*batch_size:(i+1)*batch_size]
batch_data, batch_target = batch_data.to(device), batch_target.to(device)
batch_data, batch_target = torch.from_numpy(batch_data).to(device=device, dtype=torch.float), torch.from_numpy(batch_target).to(device=device, dtype=torch.float)
# In fact, we consider the input as the collection of 2D graphs, which is already 3D tensor
# However, when the Conv2D requires the following format of input [batch, channels, length, width]
# We only have one channel, such we should add extra dim here.
batch_data_size = list(batch_data.size())
batch_data_size.insert(1, 1)
batch_data = torch.reshape(batch_data, batch_data_size)

optimizer.zero_grad()
output = model(batch_data)
loss = F.l1_loss(output, batch_target)
Expand All @@ -28,24 +35,28 @@ def train(model, device, data, target, optimizer, batch_size):
return loss_array


def process_train_set(train_sets):
def process_train_set(train_sets, attr_num, shuffle_time):
train_sets = np.array(train_sets)
targets = train_sets[:, 0:1].T[0]
datas = train_sets[:, 1:]
datas = np.array(shuffle_batch(datas))
datas = np.array(shuffle_batch(datas, attr_num, shuffle_time))
return datas, targets


def main():
device = torch.device("cuda")
net = AQPNet(ATTR_NUM*ATTR_NUM)
net = AQPNet(ATTR_NUM, SHUFFLE_TIME)
model = net.to(device)
optimizer = optim.Adadelta(model.parameters(), lr=0.1)
# We Put Our Example Train Sets in Global Variables
# And we provide the formats of the train sets
data, target = process_train_set(EG_TRAIN_SETS)
data, target = process_train_set(EG_TRAIN_SETS, ATTR_NUM, SHUFFLE_TIME)
batch_size = min(len(target), 32)
for epoch in range(1, 100):
train(model, device, data, target, optimizer, batch_size)

torch.save(model.state_dict(), MODEL_SAVE_PATH)


if __name__ == '__main__':
main()

0 comments on commit 93f8217

Please sign in to comment.