Skip to content

Commit

Permalink
Remove trailing whitespace and add whitespace around operators
Browse files Browse the repository at this point in the history
  • Loading branch information
Erin Wild committed Feb 19, 2019
1 parent 2f3bc09 commit 81a1e94
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 97 deletions.
22 changes: 11 additions & 11 deletions sompy/codebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def generate_hex_lattice(n_rows, n_columns):
y_coord = []
for i in range(n_rows):
for j in range(n_columns):
x_coord.append(i*1.5)
y_coord.append(np.sqrt(2/3)*(2*j+(1+i)%2))
x_coord.append(i * 1.5)
y_coord.append(np.sqrt(2 / 3) * (2 * j + (1 + i) % 2))
coordinates = np.column_stack([x_coord, y_coord])
return coordinates

Expand All @@ -35,13 +35,13 @@ def __init__(self, mapsize, lattice='rect'):
elif 1 == len(mapsize):
_size = [1, mapsize[0]]
print('input was considered as the numbers of nodes')
print('map size is [{dlen},{dlen}]'.format(dlen=int(mapsize[0]/2)))
print('map size is [{dlen},{dlen}]'.format(dlen=int(mapsize[0] / 2)))
else:
raise InvalidMapsizeError(
"Mapsize is expected to be a 2 element list or a single int")

self.mapsize = _size
self.nnodes = mapsize[0]*mapsize[1]
self.nnodes = mapsize[0] * mapsize[1]
self.matrix = np.asarray(self.mapsize)
self.initialized = False

Expand All @@ -59,7 +59,7 @@ def random_initialization(self, data):
"""
mn = np.tile(np.min(data, axis=0), (self.nnodes, 1))
mx = np.tile(np.max(data, axis=0), (self.nnodes, 1))
self.matrix = mn + (mx-mn)*(np.random.rand(self.nnodes, data.shape[1]))
self.matrix = mn + (mx - mn) * (np.random.rand(self.nnodes, data.shape[1]))
self.initialized = True

@timeit()
Expand Down Expand Up @@ -108,8 +108,8 @@ def pca_linear_initialization(self, data):

mx = np.max(coord, axis=0)
mn = np.min(coord, axis=0)
coord = (coord - mn)/(mx-mn)
coord = (coord - .5)*2
coord = (coord - mn) / (mx - mn)
coord = (coord - .5) * 2
me = np.mean(data, 0)
data = (data - me)
tmp_matrix = np.tile(me, (self.nnodes, 1))
Expand All @@ -122,11 +122,11 @@ def pca_linear_initialization(self, data):
eigvec = pca.components_
eigval = pca.explained_variance_
norms = np.sqrt(np.einsum('ij,ij->i', eigvec, eigvec))
eigvec = ((eigvec.T/norms)*eigval).T
eigvec = ((eigvec.T / norms) * eigval).T

for j in range(self.nnodes):
for i in range(eigvec.shape[0]):
tmp_matrix[j, :] = tmp_matrix[j, :] + coord[j, i]*eigvec[i, :]
tmp_matrix[j, :] = tmp_matrix[j, :] + coord[j, i] * eigvec[i, :]

self.matrix = np.around(tmp_matrix, decimals=6)
self.initialized = True
Expand Down Expand Up @@ -169,7 +169,7 @@ def _rect_dist(self, node_ind):
dist = None

# bmu should be an integer between 0 to no_nodes
if 0 <= node_ind <= (rows*cols):
if 0 <= node_ind <= (rows * cols):
node_col = int(node_ind % cols)
node_row = int(node_ind / cols)
else:
Expand All @@ -179,7 +179,7 @@ def _rect_dist(self, node_ind):
if rows > 0 and cols > 0:
r = np.arange(0, rows, 1)[:, np.newaxis]
c = np.arange(0, cols, 1)
dist2 = (r-node_row)**2 + (c-node_col)**2
dist2 = (r - node_row)**2 + (c - node_col)**2

dist = dist2.ravel()
else:
Expand Down
4 changes: 2 additions & 2 deletions sompy/neighborhood.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class GaussianNeighborhood(object):

@staticmethod
def calculate(distance_matrix, radius, dim):
return np.exp(-1.0*distance_matrix/(2.0*radius**2)).reshape(dim, dim)
return np.exp(-1.0 * distance_matrix / (2.0 * radius**2)).reshape(dim, dim)

def __call__(self, *args, **kwargs):
return self.calculate(*args)
Expand All @@ -38,7 +38,7 @@ class BubbleNeighborhood(object):
def calculate(distance_matrix, radius, dim):
def l(a, b):
c = np.zeros(b.shape)
c[a-b >= 0] = 1
c[a - b >= 0] = 1
return c

return l(radius,
Expand Down
4 changes: 2 additions & 2 deletions sompy/normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ def _mean_and_standard_dev(self, data):
def normalize(self, data):
me, st = self._mean_and_standard_dev(data)
st[st == 0] = 1 # prevent: when sd = 0, normalized result = NaN
return (data-me)/st
return (data - me) / st

def normalize_by(self, raw_data, data):
me, st = self._mean_and_standard_dev(raw_data)
st[st == 0] = 1 # prevent: when sd = 0, normalized result = NaN
return (data-me)/st
return (data - me) / st

def denormalize_by(self, data_by, n_vect):
me, st = self._mean_and_standard_dev(data_by)
Expand Down
88 changes: 42 additions & 46 deletions sompy/sompy.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def component_names(self, compnames):
'size as the data dimension/features')

def build_component_names(self):
cc = ['Variable-' + str(i+1) for i in range(0, self._dim)]
cc = ['Variable-' + str(i + 1) for i in range(0, self._dim)]
return np.asarray(cc)[np.newaxis, :]

@property
Expand Down Expand Up @@ -243,9 +243,9 @@ def train(self,
self.codebook.pca_linear_initialization(self._data)

self.rough_train(njob=n_job, shared_memory=shared_memory, trainlen=train_rough_len,
radiusin=train_rough_radiusin, radiusfin=train_rough_radiusfin,trainlen_factor=train_len_factor,maxtrainlen=maxtrainlen)
radiusin=train_rough_radiusin, radiusfin=train_rough_radiusfin, trainlen_factor=train_len_factor, maxtrainlen=maxtrainlen)
self.finetune_train(njob=n_job, shared_memory=shared_memory, trainlen=train_finetune_len,
radiusin=train_finetune_radiusin, radiusfin=train_finetune_radiusfin,trainlen_factor=train_len_factor,maxtrainlen=maxtrainlen)
radiusin=train_finetune_radiusin, radiusfin=train_finetune_radiusfin, trainlen_factor=train_len_factor, maxtrainlen=maxtrainlen)
logging.debug(
" --------------------------------------------------------------")
logging.info(" Final quantization error: %f" % np.mean(self._bmu[1]))
Expand All @@ -255,55 +255,54 @@ def _calculate_ms_and_mpd(self):
max_s = max(self.codebook.mapsize[0], self.codebook.mapsize[1])

if mn == 1:
mpd = float(self.codebook.nnodes*10)/float(self._dlen)
mpd = float(self.codebook.nnodes * 10) / float(self._dlen)
else:
mpd = float(self.codebook.nnodes)/float(self._dlen)
ms = max_s/2.0 if mn == 1 else max_s
mpd = float(self.codebook.nnodes) / float(self._dlen)
ms = max_s / 2.0 if mn == 1 else max_s

return ms, mpd

def rough_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, radiusfin=None,trainlen_factor=1,maxtrainlen=np.Inf):
def rough_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, radiusfin=None, trainlen_factor=1, maxtrainlen=np.Inf):
logging.info(" Rough training...")

ms, mpd = self._calculate_ms_and_mpd()
#lbugnon: add maxtrainlen
trainlen = min(int(np.ceil(30*mpd)),maxtrainlen) if not trainlen else trainlen
trainlen = min(int(np.ceil(30 * mpd)), maxtrainlen) if not trainlen else trainlen
#print("maxtrainlen %d",maxtrainlen)
#lbugnon: add trainlen_factor
trainlen=int(trainlen*trainlen_factor)
trainlen = int(trainlen * trainlen_factor)

if self.initialization == 'random':
radiusin = max(1, np.ceil(ms/3.)) if not radiusin else radiusin
radiusfin = max(1, radiusin/6.) if not radiusfin else radiusfin
radiusin = max(1, np.ceil(ms / 3.)) if not radiusin else radiusin
radiusfin = max(1, radiusin / 6.) if not radiusfin else radiusfin

elif self.initialization == 'pca':
radiusin = max(1, np.ceil(ms/8.)) if not radiusin else radiusin
radiusfin = max(1, radiusin/4.) if not radiusfin else radiusfin
radiusin = max(1, np.ceil(ms / 8.)) if not radiusin else radiusin
radiusfin = max(1, radiusin / 4.) if not radiusfin else radiusfin

self._batchtrain(trainlen, radiusin, radiusfin, njob, shared_memory)

def finetune_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, radiusfin=None,trainlen_factor=1,maxtrainlen=np.Inf):
def finetune_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, radiusfin=None, trainlen_factor=1, maxtrainlen=np.Inf):
logging.info(" Finetune training...")

ms, mpd = self._calculate_ms_and_mpd()

#lbugnon: add maxtrainlen
if self.initialization == 'random':
trainlen = min(int(np.ceil(50*mpd)),maxtrainlen) if not trainlen else trainlen
radiusin = max(1, ms/12.) if not radiusin else radiusin # from radius fin in rough training
radiusfin = max(1, radiusin/25.) if not radiusfin else radiusfin
trainlen = min(int(np.ceil(50 * mpd)), maxtrainlen) if not trainlen else trainlen
radiusin = max(1, ms / 12.) if not radiusin else radiusin # from radius fin in rough training
radiusfin = max(1, radiusin / 25.) if not radiusfin else radiusfin

elif self.initialization == 'pca':
trainlen = min(int(np.ceil(40*mpd)),maxtrainlen) if not trainlen else trainlen
radiusin = max(1, np.ceil(ms/8.)/4) if not radiusin else radiusin
trainlen = min(int(np.ceil(40 * mpd)), maxtrainlen) if not trainlen else trainlen
radiusin = max(1, np.ceil(ms / 8.) / 4) if not radiusin else radiusin
radiusfin = 1 if not radiusfin else radiusfin # max(1, ms/128)

#print("maxtrainlen %d",maxtrainlen)

#lbugnon: add trainlen_factor
trainlen=int(trainlen_factor*trainlen)


trainlen = int(trainlen_factor * trainlen)

self._batchtrain(trainlen, radiusin, radiusfin, njob, shared_memory)

def _batchtrain(self, trainlen, radiusin, radiusfin, njob=1,
Expand Down Expand Up @@ -350,9 +349,9 @@ def _batchtrain(self, trainlen, radiusin, radiusfin, njob=1,
qerror)
if np.any(np.isnan(qerror)):
logging.info("nan quantization error, exit train\n")

#sys.exit("quantization error=nan, exit train")

bmu[1] = np.sqrt(bmu[1] + fixed_euclidean_x2)
self._bmu = bmu

Expand Down Expand Up @@ -380,7 +379,7 @@ def row_chunk(part):
return part * dlen // njb

def col_chunk(part):
return min((part+1)*dlen // njb, dlen)
return min((part + 1) * dlen // njb, dlen)

chunks = [input_matrix[row_chunk(i):col_chunk(i)] for i in range(njb)]
b = pool.map(lambda chk: chunk_bmu_finder(chk, self.codebook.matrix, y2, nth=nth), chunks)
Expand Down Expand Up @@ -465,7 +464,7 @@ def predict_by(self, data, target, k=5, wt='distance'):
data = self._normalizer.normalize_by(self.data_raw, data)
data = data[:, indX]

elif dimdata == dim-1:
elif dimdata == dim - 1:
data = self._normalizer.normalize_by(self.data_raw[:, indX], data)

predicted_values = clf.predict(data)
Expand All @@ -485,7 +484,7 @@ def predict(self, x_test, k=5, wt='distance'):
(more detail in KNeighborsRegressor docs)
:returns: predicted values for the input data
"""
target = self.data_raw.shape[1]-1
target = self.data_raw.shape[1] - 1
x_train = self.codebook.matrix[:, :target]
y_train = self.codebook.matrix[:, target]
clf = neighbors.KNeighborsRegressor(k, weights=wt)
Expand Down Expand Up @@ -528,7 +527,7 @@ def bmu_ind_to_xy(self, bmu_ind):
# bmu should be an integer between 0 to no_nodes
out = np.zeros((bmu_ind.shape[0], 3))
out[:, 2] = bmu_ind
out[:, 0] = rows-1-bmu_ind / cols
out[:, 0] = rows - 1 - bmu_ind / cols
out[:, 0] = bmu_ind / cols
out[:, 1] = bmu_ind % cols

Expand Down Expand Up @@ -571,14 +570,14 @@ def predict_probability(self, data, target, k=5):
data = self._normalizer.normalize_by(self.data_raw, data)
data = data[:, indx]

elif dimdata == dim-1:
elif dimdata == dim - 1:
data = self._normalizer.normalize_by(self.data_raw[:, indx], data)

weights, ind = clf.kneighbors(data, n_neighbors=k,
return_distance=True)
weights = 1./weights
weights = 1. / weights
sum_ = np.sum(weights, axis=1)
weights = weights/sum_[:, np.newaxis]
weights = weights / sum_[:, np.newaxis]
labels = np.sign(self.codebook.matrix[ind, target])
labels[labels >= 0] = 1

Expand Down Expand Up @@ -612,18 +611,18 @@ def node_activation(self, data, target=None, wt='distance'):
weights, ind = clf.kneighbors(data)

# Softmax function
weights = 1./weights
weights = 1. / weights

return weights, ind

def calculate_topographic_error(self):
bmus1 = self.find_bmu(self.data_raw, njb=1, nth=1)
bmus2 = self.find_bmu(self.data_raw, njb=1, nth=2)
topographic_error = None
if self.codebook.lattice=="rect":
if self.codebook.lattice == "rect":
bmus_gap = np.abs((self.bmu_ind_to_xy(np.array(bmus1[0]))[:, 0:2] - self.bmu_ind_to_xy(np.array(bmus2[0]))[:, 0:2]).sum(axis=1))
topographic_error = np.mean(bmus_gap != 1)
elif self.codebook.lattice=="hexa":
elif self.codebook.lattice == "hexa":
dist_matrix_1 = self.codebook.lattice_distances[bmus1[0].astype(int)].reshape(len(bmus1[0]), -1)
topographic_error = (np.array(
[distances[bmu2] for bmu2, distances in zip(bmus2[0].astype(int), dist_matrix_1)]) > 2).mean()
Expand Down Expand Up @@ -666,7 +665,7 @@ def calculate_map_size(self, lattice):
if lattice == "rect":
size1 = min(munits, round(np.sqrt(munits / ratio)))
else:
size1 = min(munits, round(np.sqrt(munits / ratio*np.sqrt(0.75))))
size1 = min(munits, round(np.sqrt(munits / ratio * np.sqrt(0.75))))

size2 = round(munits / size1)

Expand Down Expand Up @@ -699,19 +698,16 @@ def _chunk_based_bmu_find(input_matrix, codebook, y2, nth=1):
blen = min(50, dlen)
i0 = 0

while i0+1 <= dlen:
while i0 + 1 <= dlen:
low = i0
high = min(dlen, i0+blen)
i0 = i0+blen
ddata = input_matrix[low:high+1]
high = min(dlen, i0 + blen)
i0 = i0 + blen
ddata = input_matrix[low:high + 1]
d = np.dot(codebook, ddata.T)
d *= -2
d += y2.reshape(nnodes, 1)
bmu[low:high+1, 0] = np.argpartition(d, nth, axis=0)[nth-1]
bmu[low:high+1, 1] = np.partition(d, nth, axis=0)[nth-1]
bmu[low:high + 1, 0] = np.argpartition(d, nth, axis=0)[nth - 1]
bmu[low:high + 1, 1] = np.partition(d, nth, axis=0)[nth - 1]
del ddata

return bmu



6 changes: 3 additions & 3 deletions sompy/visualization/bmuhits.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ def _set_labels(self, cents, ax, labels, onlyzeros, fontsize, hex=False):
c = cents[i] if hex else (cents[i, 1] + 0.5, cents[-(i + 1), 0] + 0.5)
ax.annotate(txt, c, va="center", ha="center", size=fontsize)

def show(self, som, anotate=True, onlyzeros=False, labelsize=7, cmap="jet", logaritmic = False):
def show(self, som, anotate=True, onlyzeros=False, labelsize=7, cmap="jet", logaritmic=False):
org_w = self.width
org_h = self.height
(self.width, self.height, indtoshow, no_row_in_plot, no_col_in_plot,
axis_num) = self._calculate_figure_params(som, 1, 1)
self.width /= (self.width/org_w) if self.width > self.height else (self.height/org_h)
self.height /= (self.width / org_w) if self.width > self.height else (self.height / org_h)
self.width /= (self.width / org_w) if self.width > self.height else (self.height / org_h)
self.height /= (self.width / org_w) if self.width > self.height else (self.height / org_h)

counts = Counter(som._bmu[0])
counts = [counts.get(x, 0) for x in range(som.codebook.mapsize[0] * som.codebook.mapsize[1])]
Expand Down
16 changes: 8 additions & 8 deletions sompy/visualization/dotmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@
class DotMapView(MatplotView):

def init_figure(self, dim, cols):
no_row_in_plot = dim/cols + 1
no_row_in_plot = dim / cols + 1
no_col_in_plot = dim if no_row_in_plot <= 1 else cols
h = .1
w = .1
self.width = no_col_in_plot*2.5*(1+w)
self.height = no_row_in_plot*2.5*(1+h)
self.width = no_col_in_plot * 2.5 * (1 + w)
self.height = no_row_in_plot * 2.5 * (1 + h)
self.prepare()

def plot(self, data, coords, msz0, msz1, colormap, dlen, dim, rows, cols):
for i in range(dim):
plt.subplot(rows, cols, i+1)
plt.subplot(rows, cols, i + 1)

# This uses the colors uniquely for each record, while in normal
# views, it is based on the values within each dimensions. This is
Expand All @@ -28,7 +28,7 @@ def plot(self, data, coords, msz0, msz1, colormap, dlen, dim, rows, cols):

for j in range(dlen):
plt.scatter(coords[j, 1],
msz0-1-coords[j, 0],
msz0 - 1 - coords[j, 0],
c=data[j, i],
vmax=mx[j], vmin=mn[j],
s=90,
Expand All @@ -38,8 +38,8 @@ def plot(self, data, coords, msz0, msz1, colormap, dlen, dim, rows, cols):
alpha=1)

eps = .0075
plt.xlim(0-eps, msz1-1+eps)
plt.ylim(0-eps, msz0-1+eps)
plt.xlim(0 - eps, msz1 - 1 + eps)
plt.ylim(0 - eps, msz0 - 1 + eps)
plt.xticks([])
plt.yticks([])

Expand All @@ -50,7 +50,7 @@ def show(self, som, which_dim='all', colormap=None, cols=None):
msz0, msz1 = som.codebook.mapsize
coords = som.bmu_ind_to_xy(som.project_data(data))[:, :2]
cols = cols if cols else 8 # 8 is arbitrary
rows = data.shape[1]/cols+1
rows = data.shape[1] / cols + 1

if which_dim == 'all':
dim = data.shape[0]
Expand Down
Loading

0 comments on commit 81a1e94

Please sign in to comment.