From a66426e56f85c696b22cd44807c6699bfbf56302 Mon Sep 17 00:00:00 2001 From: ricardomourarpm <41452349+ricardomourarpm@users.noreply.github.com> Date: Wed, 27 Feb 2019 11:45:43 +0000 Subject: [PATCH] Add files via upload --- sompy/__init__.py | 5 +- sompy/codebook.py | 26 ++-- .../examples/AirFlights_hexagonal_grid.ipynb | 6 +- sompy/neighborhood.py | 7 +- sompy/normalization.py | 9 +- sompy/sompy.py | 139 +++++++++--------- sompy/visualization/bmuhits.py | 16 +- sompy/visualization/dotmap.py | 21 ++- sompy/visualization/histogram.py | 16 +- sompy/visualization/hitmap.py | 17 ++- sompy/visualization/mapview.py | 67 +++++---- sompy/visualization/plot_tools.py | 9 +- sompy/visualization/plot_tools2.py | 80 ++++++++++ sompy/visualization/umatrix.py | 30 ++-- 14 files changed, 261 insertions(+), 187 deletions(-) create mode 100644 sompy/visualization/plot_tools2.py diff --git a/sompy/__init__.py b/sompy/__init__.py index 6abcd49..3b1e899 100644 --- a/sompy/__init__.py +++ b/sompy/__init__.py @@ -1,6 +1,7 @@ -from logging.config import dictConfig +from logging.config import dictConfig import matplotlib + #matplotlib.use('Agg') # Use whatever backend is available dictConfig({ @@ -24,5 +25,7 @@ } }) + + from .sompy import SOMFactory from .visualization import * diff --git a/sompy/codebook.py b/sompy/codebook.py index 7c8b340..1195347 100644 --- a/sompy/codebook.py +++ b/sompy/codebook.py @@ -1,7 +1,8 @@ import numpy as np import scipy as sp -from sklearn.decomposition import PCA +from sklearn.decomposition import PCA +#from sklearn.decomposition import RandomizedPCA# (randomizedpca is deprecated) from .decorators import timeit @@ -12,14 +13,13 @@ class InvalidNodeIndexError(Exception): class InvalidMapsizeError(Exception): pass - def generate_hex_lattice(n_rows, n_columns): x_coord = [] y_coord = [] for i in range(n_rows): for j in range(n_columns): - x_coord.append(i * 1.5) - y_coord.append(np.sqrt(2 / 3) * (2 * j + (1 + i) % 2)) + x_coord.append(i*1.5) + y_coord.append(np.sqrt(2/3)*(2*j+(1+i)%2)) coordinates = np.column_stack([x_coord, y_coord]) return coordinates @@ -35,13 +35,13 @@ def __init__(self, mapsize, lattice='rect'): elif 1 == len(mapsize): _size = [1, mapsize[0]] print('input was considered as the numbers of nodes') - print('map size is [{dlen},{dlen}]'.format(dlen=int(mapsize[0] / 2))) + print('map size is [{dlen},{dlen}]'.format(dlen=int(mapsize[0]/2))) else: raise InvalidMapsizeError( "Mapsize is expected to be a 2 element list or a single int") self.mapsize = _size - self.nnodes = mapsize[0] * mapsize[1] + self.nnodes = mapsize[0]*mapsize[1] self.matrix = np.asarray(self.mapsize) self.initialized = False @@ -59,7 +59,7 @@ def random_initialization(self, data): """ mn = np.tile(np.min(data, axis=0), (self.nnodes, 1)) mx = np.tile(np.max(data, axis=0), (self.nnodes, 1)) - self.matrix = mn + (mx - mn) * (np.random.rand(self.nnodes, data.shape[1])) + self.matrix = mn + (mx-mn)*(np.random.rand(self.nnodes, data.shape[1])) self.initialized = True @timeit() @@ -108,8 +108,8 @@ def pca_linear_initialization(self, data): mx = np.max(coord, axis=0) mn = np.min(coord, axis=0) - coord = (coord - mn) / (mx - mn) - coord = (coord - .5) * 2 + coord = (coord - mn)/(mx-mn) + coord = (coord - .5)*2 me = np.mean(data, 0) data = (data - me) tmp_matrix = np.tile(me, (self.nnodes, 1)) @@ -122,11 +122,11 @@ def pca_linear_initialization(self, data): eigvec = pca.components_ eigval = pca.explained_variance_ norms = np.sqrt(np.einsum('ij,ij->i', eigvec, eigvec)) - eigvec = ((eigvec.T / norms) * eigval).T + eigvec = ((eigvec.T/norms)*eigval).T for j in range(self.nnodes): for i in range(eigvec.shape[0]): - tmp_matrix[j, :] = tmp_matrix[j, :] + coord[j, i] * eigvec[i, :] + tmp_matrix[j, :] = tmp_matrix[j, :] + coord[j, i]*eigvec[i, :] self.matrix = np.around(tmp_matrix, decimals=6) self.initialized = True @@ -169,7 +169,7 @@ def _rect_dist(self, node_ind): dist = None # bmu should be an integer between 0 to no_nodes - if 0 <= node_ind <= (rows * cols): + if 0 <= node_ind <= (rows*cols): node_col = int(node_ind % cols) node_row = int(node_ind / cols) else: @@ -179,7 +179,7 @@ def _rect_dist(self, node_ind): if rows > 0 and cols > 0: r = np.arange(0, rows, 1)[:, np.newaxis] c = np.arange(0, cols, 1) - dist2 = (r - node_row)**2 + (c - node_col)**2 + dist2 = (r-node_row)**2 + (c-node_col)**2 dist = dist2.ravel() else: diff --git a/sompy/examples/AirFlights_hexagonal_grid.ipynb b/sompy/examples/AirFlights_hexagonal_grid.ipynb index 64ef117..d652307 100644 --- a/sompy/examples/AirFlights_hexagonal_grid.ipynb +++ b/sompy/examples/AirFlights_hexagonal_grid.ipynb @@ -354,12 +354,10 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "raw", "metadata": {}, - "outputs": [], "source": [ - "%time\n", + "%%time\n", "# Train the model with different parameters. The more, the better. Each iteration is stored in disk for further study\n", "for i in range(1000):\n", " sm = SOMFactory().build(data, mapsize=[random.choice(list(range(15, 25))), \n", diff --git a/sompy/neighborhood.py b/sompy/neighborhood.py index fc35407..a4d2c9c 100644 --- a/sompy/neighborhood.py +++ b/sompy/neighborhood.py @@ -1,8 +1,7 @@ +import numpy as np import inspect import sys -import numpy as np - small = .000000000001 @@ -25,7 +24,7 @@ class GaussianNeighborhood(object): @staticmethod def calculate(distance_matrix, radius, dim): - return np.exp(-1.0 * distance_matrix / (2.0 * radius**2)).reshape(dim, dim) + return np.exp(-1.0*distance_matrix/(2.0*radius**2)).reshape(dim, dim) def __call__(self, *args, **kwargs): return self.calculate(*args) @@ -39,7 +38,7 @@ class BubbleNeighborhood(object): def calculate(distance_matrix, radius, dim): def l(a, b): c = np.zeros(b.shape) - c[a - b >= 0] = 1 + c[a-b >= 0] = 1 return c return l(radius, diff --git a/sompy/normalization.py b/sompy/normalization.py index 78f5ab9..cf833ca 100644 --- a/sompy/normalization.py +++ b/sompy/normalization.py @@ -1,7 +1,6 @@ -import inspect -import sys - import numpy as np +import sys +import inspect class NormalizerFactory(object): @@ -38,12 +37,12 @@ def _mean_and_standard_dev(self, data): def normalize(self, data): me, st = self._mean_and_standard_dev(data) st[st == 0] = 1 # prevent: when sd = 0, normalized result = NaN - return (data - me) / st + return (data-me)/st def normalize_by(self, raw_data, data): me, st = self._mean_and_standard_dev(raw_data) st[st == 0] = 1 # prevent: when sd = 0, normalized result = NaN - return (data - me) / st + return (data-me)/st def denormalize_by(self, data_by, n_vect): me, st = self._mean_and_standard_dev(data_by) diff --git a/sompy/sompy.py b/sompy/sompy.py index a350011..5a32675 100644 --- a/sompy/sompy.py +++ b/sompy/sompy.py @@ -8,21 +8,23 @@ # Contributor: Sebastian Packmann (sebastian.packmann@gmail.com) +import tempfile +import os import itertools import logging -import os -import tempfile -from multiprocessing import cpu_count -from multiprocessing.dummy import Pool -from time import time import numpy as np + +from time import time +from multiprocessing.dummy import Pool +from multiprocessing import cpu_count from scipy.sparse import csr_matrix from sklearn import neighbors -from sklearn.externals.joblib import dump, load +from sklearn.externals.joblib import Parallel, delayed, load, dump +import sys -from .codebook import Codebook from .decorators import timeit +from .codebook import Codebook from .neighborhood import NeighborhoodFactory from .normalization import NormalizerFactory @@ -137,8 +139,7 @@ def __init__(self, mapsize = self.calculate_map_size(lattice) if not mapsize else mapsize self.codebook = Codebook(mapsize, lattice) self.training = training - self._component_names = (self.build_component_names() - if component_names is None else [component_names]) + self._component_names = self.build_component_names() if component_names is None else [component_names] self._distance_matrix = self.calculate_map_dist() @property @@ -154,7 +155,7 @@ def component_names(self, compnames): 'size as the data dimension/features') def build_component_names(self): - cc = ['Variable-' + str(i + 1) for i in range(0, self._dim)] + cc = ['Variable-' + str(i+1) for i in range(0, self._dim)] return np.asarray(cc)[np.newaxis, :] @property @@ -213,8 +214,7 @@ def train(self, :param n_job: number of jobs to use to parallelize the traning :param shared_memory: flag to active shared memory :param verbose: verbosity, could be 'debug', 'info' or None - :param train_len_factor: Factor that multiply default training lenghts - (similar to "training" parameter in the matlab version). (lbugnon) + :param train_len_factor: Factor that multiply default training lenghts (similar to "training" parameter in the matlab version). (lbugnon) """ logging.root.setLevel( getattr(logging, verbose.upper()) if verbose else logging.ERROR) @@ -243,11 +243,9 @@ def train(self, self.codebook.pca_linear_initialization(self._data) self.rough_train(njob=n_job, shared_memory=shared_memory, trainlen=train_rough_len, - radiusin=train_rough_radiusin, radiusfin=train_rough_radiusfin, - trainlen_factor=train_len_factor, maxtrainlen=maxtrainlen) + radiusin=train_rough_radiusin, radiusfin=train_rough_radiusfin,trainlen_factor=train_len_factor,maxtrainlen=maxtrainlen) self.finetune_train(njob=n_job, shared_memory=shared_memory, trainlen=train_finetune_len, - radiusin=train_finetune_radiusin, radiusfin=train_finetune_radiusfin, - trainlen_factor=train_len_factor, maxtrainlen=maxtrainlen) + radiusin=train_finetune_radiusin, radiusfin=train_finetune_radiusfin,trainlen_factor=train_len_factor,maxtrainlen=maxtrainlen) logging.debug( " --------------------------------------------------------------") logging.info(" Final quantization error: %f" % np.mean(self._bmu[1])) @@ -257,56 +255,55 @@ def _calculate_ms_and_mpd(self): max_s = max(self.codebook.mapsize[0], self.codebook.mapsize[1]) if mn == 1: - mpd = float(self.codebook.nnodes * 10) / float(self._dlen) + mpd = float(self.codebook.nnodes*10)/float(self._dlen) else: - mpd = float(self.codebook.nnodes) / float(self._dlen) - ms = max_s / 2.0 if mn == 1 else max_s + mpd = float(self.codebook.nnodes)/float(self._dlen) + ms = max_s/2.0 if mn == 1 else max_s return ms, mpd - def rough_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, - radiusfin=None, trainlen_factor=1, maxtrainlen=np.Inf): + def rough_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, radiusfin=None,trainlen_factor=1,maxtrainlen=np.Inf): logging.info(" Rough training...") ms, mpd = self._calculate_ms_and_mpd() #lbugnon: add maxtrainlen - trainlen = min(int(np.ceil(30 * mpd)), maxtrainlen) if not trainlen else trainlen + trainlen = min(int(np.ceil(30*mpd)),maxtrainlen) if not trainlen else trainlen #print("maxtrainlen %d",maxtrainlen) #lbugnon: add trainlen_factor - trainlen = int(trainlen * trainlen_factor) - + trainlen=int(trainlen*trainlen_factor) + if self.initialization == 'random': - radiusin = max(1, np.ceil(ms / 3.)) if not radiusin else radiusin - radiusfin = max(1, radiusin / 6.) if not radiusfin else radiusfin + radiusin = max(1, np.ceil(ms/3.)) if not radiusin else radiusin + radiusfin = max(1, radiusin/6.) if not radiusfin else radiusfin elif self.initialization == 'pca': - radiusin = max(1, np.ceil(ms / 8.)) if not radiusin else radiusin - radiusfin = max(1, radiusin / 4.) if not radiusfin else radiusfin + radiusin = max(1, np.ceil(ms/8.)) if not radiusin else radiusin + radiusfin = max(1, radiusin/4.) if not radiusfin else radiusfin self._batchtrain(trainlen, radiusin, radiusfin, njob, shared_memory) - def finetune_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, - radiusfin=None, trainlen_factor=1, maxtrainlen=np.Inf): + def finetune_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, radiusfin=None,trainlen_factor=1,maxtrainlen=np.Inf): logging.info(" Finetune training...") ms, mpd = self._calculate_ms_and_mpd() #lbugnon: add maxtrainlen if self.initialization == 'random': - trainlen = min(int(np.ceil(50 * mpd)), maxtrainlen) if not trainlen else trainlen - radiusin = max(1, ms / 12.) if not radiusin else radiusin - radiusfin = max(1, radiusin / 25.) if not radiusfin else radiusfin + trainlen = min(int(np.ceil(50*mpd)),maxtrainlen) if not trainlen else trainlen + radiusin = max(1, ms/12.) if not radiusin else radiusin # from radius fin in rough training + radiusfin = max(1, radiusin/25.) if not radiusfin else radiusfin elif self.initialization == 'pca': - trainlen = min(int(np.ceil(40 * mpd)), maxtrainlen) if not trainlen else trainlen - radiusin = max(1, np.ceil(ms / 8.) / 4) if not radiusin else radiusin + trainlen = min(int(np.ceil(40*mpd)),maxtrainlen) if not trainlen else trainlen + radiusin = max(1, np.ceil(ms/8.)/4) if not radiusin else radiusin radiusfin = 1 if not radiusfin else radiusfin # max(1, ms/128) #print("maxtrainlen %d",maxtrainlen) - + #lbugnon: add trainlen_factor - trainlen = int(trainlen_factor * trainlen) - + trainlen=int(trainlen_factor*trainlen) + + self._batchtrain(trainlen, radiusin, radiusfin, njob, shared_memory) def _batchtrain(self, trainlen, radiusin, radiusfin, njob=1, @@ -342,19 +339,20 @@ def _batchtrain(self, trainlen, radiusin, radiusfin, njob=1, self.codebook.matrix = self.update_codebook_voronoi(data, bmu, neighborhood) - #lbugnon: ojo! aca el bmy[1] a veces da negativo, y despues de eso se rompe... - # hay algo raro ahi + #lbugnon: ojo! aca el bmy[1] a veces da negativo, y despues de eso se rompe...hay algo raro ahi qerror = (i + 1, round(time() - t1, 3), - np.mean(np.sqrt(bmu[1] + fixed_euclidean_x2))) - + np.mean(np.sqrt(bmu[1] + fixed_euclidean_x2))) #lbugnon: ojo aca me tirĂ³ un warning, revisar (commit sinc: 965666d3d4d93bcf48e8cef6ea2c41a018c1cb83 ) + #lbugnon + #ipdb.set_trace() + # logging.info( " epoch: %d ---> elapsed time: %f, quantization error: %f\n" % qerror) if np.any(np.isnan(qerror)): logging.info("nan quantization error, exit train\n") - + #sys.exit("quantization error=nan, exit train") - + bmu[1] = np.sqrt(bmu[1] + fixed_euclidean_x2) self._bmu = bmu @@ -382,7 +380,7 @@ def row_chunk(part): return part * dlen // njb def col_chunk(part): - return min((part + 1) * dlen // njb, dlen) + return min((part+1)*dlen // njb, dlen) chunks = [input_matrix[row_chunk(i):col_chunk(i)] for i in range(njb)] b = pool.map(lambda chk: chunk_bmu_finder(chk, self.codebook.matrix, y2, nth=nth), chunks) @@ -467,7 +465,7 @@ def predict_by(self, data, target, k=5, wt='distance'): data = self._normalizer.normalize_by(self.data_raw, data) data = data[:, indX] - elif dimdata == dim - 1: + elif dimdata == dim-1: data = self._normalizer.normalize_by(self.data_raw[:, indX], data) predicted_values = clf.predict(data) @@ -487,7 +485,7 @@ def predict(self, x_test, k=5, wt='distance'): (more detail in KNeighborsRegressor docs) :returns: predicted values for the input data """ - target = self.data_raw.shape[1] - 1 + target = self.data_raw.shape[1]-1 x_train = self.codebook.matrix[:, :target] y_train = self.codebook.matrix[:, target] clf = neighbors.KNeighborsRegressor(k, weights=wt) @@ -530,7 +528,7 @@ def bmu_ind_to_xy(self, bmu_ind): # bmu should be an integer between 0 to no_nodes out = np.zeros((bmu_ind.shape[0], 3)) out[:, 2] = bmu_ind - out[:, 0] = rows - 1 - bmu_ind / cols + out[:, 0] = rows-1-bmu_ind / cols out[:, 0] = bmu_ind / cols out[:, 1] = bmu_ind % cols @@ -573,14 +571,14 @@ def predict_probability(self, data, target, k=5): data = self._normalizer.normalize_by(self.data_raw, data) data = data[:, indx] - elif dimdata == dim - 1: + elif dimdata == dim-1: data = self._normalizer.normalize_by(self.data_raw[:, indx], data) weights, ind = clf.kneighbors(data, n_neighbors=k, return_distance=True) - weights = 1. / weights + weights = 1./weights sum_ = np.sum(weights, axis=1) - weights = weights / sum_[:, np.newaxis] + weights = weights/sum_[:, np.newaxis] labels = np.sign(self.codebook.matrix[ind, target]) labels[labels >= 0] = 1 @@ -614,7 +612,7 @@ def node_activation(self, data, target=None, wt='distance'): weights, ind = clf.kneighbors(data) # Softmax function - weights = 1. / weights + weights = 1./weights return weights, ind @@ -622,16 +620,13 @@ def calculate_topographic_error(self): bmus1 = self.find_bmu(self.data_raw, njb=1, nth=1) bmus2 = self.find_bmu(self.data_raw, njb=1, nth=2) topographic_error = None - if self.codebook.lattice == "rect": - bmus_gap = np.abs((self.bmu_ind_to_xy(np.array(bmus1[0]))[:, 0:2] - - self.bmu_ind_to_xy(np.array(bmus2[0]))[:, 0:2]).sum(axis=1)) + if self.codebook.lattice=="rect": + bmus_gap = np.abs((self.bmu_ind_to_xy(np.array(bmus1[0]))[:, 0:2] - self.bmu_ind_to_xy(np.array(bmus2[0]))[:, 0:2]).sum(axis=1)) topographic_error = np.mean(bmus_gap != 1) - elif self.codebook.lattice == "hexa": - dist_matrix_1 = (self.codebook.lattice_distances[bmus1[0].astype(int)] - .reshape(len(bmus1[0]), -1)) - topographic_error = (np.array([distances[bmu2] - for bmu2, distances - in zip(bmus2[0].astype(int), dist_matrix_1)]) > 2).mean() + elif self.codebook.lattice=="hexa": + dist_matrix_1 = self.codebook.lattice_distances[bmus1[0].astype(int)].reshape(len(bmus1[0]), -1) + topographic_error = (np.array( + [distances[bmu2] for bmu2, distances in zip(bmus2[0].astype(int), dist_matrix_1)]) > 2).mean() return(topographic_error) def calculate_quantization_error(self): @@ -640,9 +635,8 @@ def calculate_quantization_error(self): return quantization_error def calculate_map_size(self, lattice): - """Calculates the optimal map size given a dataset using eigenvalues and eigenvectors. - Matlab ported. - + """ + Calculates the optimal map size given a dataset using eigenvalues and eigenvectors. Matlab ported :lattice: 'rect' or 'hex' :return: map sizes """ @@ -672,7 +666,7 @@ def calculate_map_size(self, lattice): if lattice == "rect": size1 = min(munits, round(np.sqrt(munits / ratio))) else: - size1 = min(munits, round(np.sqrt(munits / ratio * np.sqrt(0.75)))) + size1 = min(munits, round(np.sqrt(munits / ratio*np.sqrt(0.75)))) size2 = round(munits / size1) @@ -705,16 +699,19 @@ def _chunk_based_bmu_find(input_matrix, codebook, y2, nth=1): blen = min(50, dlen) i0 = 0 - while i0 + 1 <= dlen: + while i0+1 <= dlen: low = i0 - high = min(dlen, i0 + blen) - i0 = i0 + blen - ddata = input_matrix[low:high + 1] + high = min(dlen, i0+blen) + i0 = i0+blen + ddata = input_matrix[low:high+1] d = np.dot(codebook, ddata.T) d *= -2 d += y2.reshape(nnodes, 1) - bmu[low:high + 1, 0] = np.argpartition(d, nth, axis=0)[nth - 1] - bmu[low:high + 1, 1] = np.partition(d, nth, axis=0)[nth - 1] + bmu[low:high+1, 0] = np.argpartition(d, nth, axis=0)[nth-1] + bmu[low:high+1, 1] = np.partition(d, nth, axis=0)[nth-1] del ddata return bmu + + + diff --git a/sompy/visualization/bmuhits.py b/sompy/visualization/bmuhits.py index cc7b870..d382fc6 100644 --- a/sompy/visualization/bmuhits.py +++ b/sompy/visualization/bmuhits.py @@ -5,7 +5,9 @@ from matplotlib import pyplot as plt from .mapview import MapView -from .plot_tools import plot_hex_map + +from sompy.visualization.plot_tools import plot_hex_map + class BmuHitsView(MapView): @@ -17,18 +19,18 @@ def _set_labels(self, cents, ax, labels, onlyzeros, fontsize, hex=False): c = cents[i] if hex else (cents[i, 1] + 0.5, cents[-(i + 1), 0] + 0.5) ax.annotate(txt, c, va="center", ha="center", size=fontsize) - def show(self, som, anotate=True, onlyzeros=False, labelsize=7, cmap="jet", logaritmic=False): + def show(self, som, anotate=True, onlyzeros=False, labelsize=7, cmap="jet", logaritmic = False): org_w = self.width org_h = self.height (self.width, self.height, indtoshow, no_row_in_plot, no_col_in_plot, axis_num) = self._calculate_figure_params(som, 1, 1) - self.width /= (self.width / org_w) if self.width > self.height else (self.height / org_h) - self.height /= (self.width / org_w) if self.width > self.height else (self.height / org_h) + self.width /= (self.width/org_w) if self.width > self.height else (self.height/org_h) + self.height /= (self.width / org_w) if self.width > self.height else (self.height / org_h) counts = Counter(som._bmu[0]) - counts = [counts.get(x, 0) - for x in range(som.codebook.mapsize[0] * som.codebook.mapsize[1])] - mp = np.array(counts).reshape(som.codebook.mapsize[0], som.codebook.mapsize[1]) + counts = [counts.get(x, 0) for x in range(som.codebook.mapsize[0] * som.codebook.mapsize[1])] + mp = np.array(counts).reshape(som.codebook.mapsize[0], + som.codebook.mapsize[1]) if not logaritmic: norm = matplotlib.colors.Normalize( diff --git a/sompy/visualization/dotmap.py b/sompy/visualization/dotmap.py index 8a0697a..7ba10ff 100644 --- a/sompy/visualization/dotmap.py +++ b/sompy/visualization/dotmap.py @@ -1,23 +1,22 @@ -import numpy as np -from matplotlib import pyplot as plt - from .view import MatplotView +from matplotlib import pyplot as plt +import numpy as np class DotMapView(MatplotView): def init_figure(self, dim, cols): - no_row_in_plot = dim / cols + 1 + no_row_in_plot = dim/cols + 1 no_col_in_plot = dim if no_row_in_plot <= 1 else cols h = .1 w = .1 - self.width = no_col_in_plot * 2.5 * (1 + w) - self.height = no_row_in_plot * 2.5 * (1 + h) + self.width = no_col_in_plot*2.5*(1+w) + self.height = no_row_in_plot*2.5*(1+h) self.prepare() def plot(self, data, coords, msz0, msz1, colormap, dlen, dim, rows, cols): for i in range(dim): - plt.subplot(rows, cols, i + 1) + plt.subplot(rows, cols, i+1) # This uses the colors uniquely for each record, while in normal # views, it is based on the values within each dimensions. This is @@ -29,7 +28,7 @@ def plot(self, data, coords, msz0, msz1, colormap, dlen, dim, rows, cols): for j in range(dlen): plt.scatter(coords[j, 1], - msz0 - 1 - coords[j, 0], + msz0-1-coords[j, 0], c=data[j, i], vmax=mx[j], vmin=mn[j], s=90, @@ -39,8 +38,8 @@ def plot(self, data, coords, msz0, msz1, colormap, dlen, dim, rows, cols): alpha=1) eps = .0075 - plt.xlim(0 - eps, msz1 - 1 + eps) - plt.ylim(0 - eps, msz0 - 1 + eps) + plt.xlim(0-eps, msz1-1+eps) + plt.ylim(0-eps, msz0-1+eps) plt.xticks([]) plt.yticks([]) @@ -51,7 +50,7 @@ def show(self, som, which_dim='all', colormap=None, cols=None): msz0, msz1 = som.codebook.mapsize coords = som.bmu_ind_to_xy(som.project_data(data))[:, :2] cols = cols if cols else 8 # 8 is arbitrary - rows = data.shape[1] / cols + 1 + rows = data.shape[1]/cols+1 if which_dim == 'all': dim = data.shape[0] diff --git a/sompy/visualization/histogram.py b/sompy/visualization/histogram.py index d89e8f0..cc71996 100644 --- a/sompy/visualization/histogram.py +++ b/sompy/visualization/histogram.py @@ -1,15 +1,15 @@ -import numpy as np -from matplotlib import cm, pyplot as plt -from matplotlib.colors import LogNorm - from .view import MatplotView +from matplotlib import pyplot as plt +from matplotlib import cm +from matplotlib.colors import LogNorm +import numpy as np class Hist2d(MatplotView): def _fill_hist(self, x, y, mapsize, data_coords, what='train'): - x = np.arange(.5, mapsize[1] + .5, 1) - y = np.arange(.5, mapsize[0] + .5, 1) + x = np.arange(.5, mapsize[1]+.5, 1) + y = np.arange(.5, mapsize[0]+.5, 1) X, Y = np.meshgrid(x, y) if what == 'train': @@ -23,12 +23,12 @@ def _fill_hist(self, x, y, mapsize, data_coords, what='train'): else: a = plt.hist2d(x, y, bins=(mapsize[1], mapsize[0]), alpha=.0, cmap=cm.jet, norm=LogNorm()) - area = a[0].T * 50 + area = a[0].T*50 plt.scatter(data_coords[:, 1] + .5, mapsize[0] - .5 - data_coords[:, 0], s=area, alpha=0.9, c='None', marker='o', cmap='jet', linewidths=3, edgecolor='r') - plt.scatter(data_coords[:, 1] + .5, mapsize[0] - .5 - data_coords[:, 0], + plt.scatter(data_coords[:, 1]+.5, mapsize[0]-.5-data_coords[:, 0], s=area, alpha=0.2, c='b', marker='o', cmap='jet', linewidths=3, edgecolor='r') diff --git a/sompy/visualization/hitmap.py b/sompy/visualization/hitmap.py index 9d86284..a737440 100644 --- a/sompy/visualization/hitmap.py +++ b/sompy/visualization/hitmap.py @@ -1,8 +1,10 @@ -import numpy as np +from .view import MatplotView +from sompy.visualization.plot_tools import plot_hex_map from matplotlib import pyplot as plt +import numpy as np from .mapview import MapView -from .plot_tools import plot_hex_map + class HitMapView(MapView): @@ -12,7 +14,7 @@ def _set_labels(self, cents, ax, labels, onlyzeros, fontsize, hex=False): if onlyzeros == True: if txt > 0: txt = "" - c = cents[i] if hex else (cents[i, 1] + 0.5, cents[-(i + 1), 0] + 0.5) + c = cents[i] if hex else (cents[i, 1], cents[-(i + 1), 0]) ax.annotate(txt, c, va="center", ha="center", size=fontsize) def show(self, som, data=None, anotate=True, onlyzeros=False, labelsize=7, cmap="jet"): @@ -42,15 +44,14 @@ def show(self, som, data=None, anotate=True, onlyzeros=False, labelsize=7, cmap= self._set_labels(cents, ax, clusters[proj], onlyzeros, labelsize, hex=False) else: - cents = som.bmu_ind_to_xy(np.arange(0, msz[0] * msz[1])) + cents = som.bmu_ind_to_xy(np.arange(0, msz[0]*msz[1])) if anotate: # TODO: Fix position of the labels self._set_labels(cents, ax, clusters, onlyzeros, labelsize, hex=False) - plt.imshow(clusters.reshape(msz[0], msz[1])[::], alpha=.5) + plt.imshow(np.flip(clusters.reshape(msz[0], msz[1])[::],axis=0), alpha=0.5) elif som.codebook.lattice == "hexa": - ax, cents = plot_hex_map(np.flip(clusters.reshape(msz[0], msz[1])[::], axis=1), - fig=self._fig, colormap=cmap, colorbar=False) + ax, cents = plot_hex_map(np.flip(clusters.reshape(msz[0], msz[1])[::], axis=0), fig=self._fig, colormap=cmap, colorbar=False) if anotate: - self._set_labels(cents, ax, clusters, onlyzeros, labelsize, hex=True) + self._set_labels(cents, ax, reversed(clusters), onlyzeros, labelsize, hex=True) \ No newline at end of file diff --git a/sompy/visualization/mapview.py b/sompy/visualization/mapview.py index bca09d8..1dd5485 100644 --- a/sompy/visualization/mapview.py +++ b/sompy/visualization/mapview.py @@ -1,9 +1,10 @@ +from matplotlib import colors import matplotlib -import numpy as np -from matplotlib import pyplot as plt -from .plot_tools import plot_hex_map +from sompy.visualization.plot_tools import plot_hex_map from .view import MatplotView +from matplotlib import pyplot as plt +import numpy as np class MapView(MatplotView): @@ -25,7 +26,7 @@ def _calculate_figure_params(self, som, which_dim, col_sz): ratio_hitmap = msz_row / float(msz_col) ratio_fig = row_sz / float(col_sz) indtoshow = np.arange(0, dim).T - sH, sV = 16, 16 * ratio_fig * ratio_hitmap + sH, sV = 16, 16*ratio_fig*ratio_hitmap elif type(which_dim) == int: dim = 1 @@ -43,7 +44,7 @@ def _calculate_figure_params(self, som, which_dim, col_sz): ratio_hitmap = msz_row / float(msz_col) ratio_fig = row_sz / float(col_sz) indtoshow = np.asarray(which_dim).T - sH, sV = 16, 16 * ratio_fig * ratio_hitmap + sH, sV = 16, 16*ratio_fig*ratio_hitmap no_row_in_plot = dim / col_sz + 1 # 6 is arbitrarily selected if no_row_in_plot <= 1: @@ -80,36 +81,32 @@ def show(self, som, what='codebook', which_dim='all', cmap=None, elif type(which_dim) == list: names = som._component_names[0][which_dim] - if som.codebook.lattice == "rect": + if som.codebook.lattice=="rect": while axis_num < len(indtoshow): axis_num += 1 ax = plt.subplot(no_row_in_plot, no_col_in_plot, axis_num) - ind = int(indtoshow[axis_num - 1]) - - min_color_scale = (np.mean(codebook[:, ind].flatten()) - - 1 * np.std(codebook[:, ind].flatten())) - max_color_scale = (np.mean(codebook[:, ind].flatten()) - + 1 * np.std(codebook[:, ind].flatten())) - min_color_scale = (min_color_scale - if min_color_scale >= min(codebook[:, ind].flatten()) - else min(codebook[:, ind].flatten())) - max_color_scale = (max_color_scale - if max_color_scale <= max(codebook[:, ind].flatten()) - else max(codebook[:, ind].flatten())) - norm = matplotlib.colors.Normalize(vmin=min_color_scale, vmax=max_color_scale, - clip=True) - - mp = codebook[:, ind].reshape(som.codebook.mapsize[0], som.codebook.mapsize[1]) + ind = int(indtoshow[axis_num-1]) + + min_color_scale = np.mean(codebook[:, ind].flatten()) - 1 * np.std(codebook[:, ind].flatten()) + max_color_scale = np.mean(codebook[:, ind].flatten()) + 1 * np.std(codebook[:, ind].flatten()) + min_color_scale = min_color_scale if min_color_scale >= min(codebook[:, ind].flatten()) else \ + min(codebook[:, ind].flatten()) + max_color_scale = max_color_scale if max_color_scale <= max(codebook[:, ind].flatten()) else \ + max(codebook[:, ind].flatten()) + norm = matplotlib.colors.Normalize(vmin=min_color_scale, vmax=max_color_scale, clip=True) + + mp = codebook[:, ind].reshape(som.codebook.mapsize[0], + som.codebook.mapsize[1]) pl = plt.pcolor(mp[::-1], norm=norm) plt.axis([0, som.codebook.mapsize[1], 0, som.codebook.mapsize[0]]) plt.title(names[axis_num - 1]) ax.set_yticklabels([]) ax.set_xticklabels([]) plt.colorbar(pl) - elif som.codebook.lattice == "hexa": - plot_hex_map(codebook.reshape(som.codebook.mapsize + [som.codebook.matrix.shape[-1]]), - titles=names, shape=[no_row_in_plot, no_col_in_plot], colormap=cmap, - fig=self._fig) + elif som.codebook.lattice=="hexa": + plot_hex_map(codebook.reshape(som.codebook.mapsize + [som.codebook.matrix.shape[-1]]), titles=names, + shape=[no_row_in_plot, no_col_in_plot], colormap=cmap, fig=self._fig) + class View2DPacked(MapView): @@ -133,8 +130,8 @@ def show(self, som, what='codebook', which_dim='all', cmap=None, if what == 'codebook': h = .1 w = .1 - self.width = no_col_in_plot * 2.5 * (1 + w) - self.height = no_row_in_plot * 2.5 * (1 + h) + self.width = no_col_in_plot*2.5*(1+w) + self.height = no_row_in_plot*2.5*(1+h) self.prepare() while axis_num < len(indtoshow): @@ -142,7 +139,7 @@ def show(self, som, what='codebook', which_dim='all', cmap=None, ax = self._fig.add_subplot(no_row_in_plot, no_col_in_plot, axis_num) ax.axis('off') - ind = int(indtoshow[axis_num - 1]) + ind = int(indtoshow[axis_num-1]) mp = codebook[:, ind].reshape(msz0, msz1) plt.imshow(mp[::-1], norm=None, cmap=cmap) self._set_axis(ax, msz0, msz1) @@ -159,8 +156,8 @@ def show(self, som, what='codebook', which_dim='all', cmap=None, h = .2 w = .001 - self.width = msz0 / 2 - self.height = msz1 / 2 + self.width = msz0/2 + self.height = msz1/2 self.prepare() ax = self._fig.add_subplot(1, 1, 1) @@ -172,8 +169,8 @@ def show(self, som, what='codebook', which_dim='all', cmap=None, plt.subplots_adjust(hspace=h, wspace=w) plt.show() - - + + class View1D(MapView): def show(self, som, what='codebook', which_dim='all', cmap=None, @@ -187,8 +184,10 @@ def show(self, som, what='codebook', which_dim='all', cmap=None, while axis_num < len(indtoshow): axis_num += 1 plt.subplot(no_row_in_plot, no_col_in_plot, axis_num) - ind = int(indtoshow[axis_num - 1]) + ind = int(indtoshow[axis_num-1]) mp = codebook[:, ind] plt.plot(mp, '-k', linewidth=0.8) #plt.show() + + diff --git a/sompy/visualization/plot_tools.py b/sompy/visualization/plot_tools.py index 8a96c38..b610a7d 100644 --- a/sompy/visualization/plot_tools.py +++ b/sompy/visualization/plot_tools.py @@ -6,8 +6,8 @@ from mpl_toolkits.axes_grid1 import make_axes_locatable -def plot_hex_map(d_matrix, titles=[], colormap=cm.gray, shape=[1, 1], comp_width=5, - hex_shrink=1.0, fig=None, colorbar=True): +def plot_hex_map(d_matrix, titles=[], colormap=cm.gray, shape=[1, 1], comp_width=5, hex_shrink=1.0, fig=None, + colorbar=True): """ Plot hexagon map where each neuron is represented by a hexagon. The hexagon color is given by the distance between the neurons (D-Matrix) @@ -25,8 +25,7 @@ def plot_hex_map(d_matrix, titles=[], colormap=cm.gray, shape=[1, 1], comp_width def create_grid_coordinates(x, y): coordinates = [x for row in -1 * np.array(list(range(x))) for x in - list(zip(np.arange(((row) % 2) * 0.5, y + ((row) % 2) * 0.5), - [0.8660254 * (row)] * y))] + list(zip(np.arange(((row) % 2) * 0.5, y + ((row) % 2) * 0.5), [0.8660254 * (row)] * y))] return (np.array(list(reversed(coordinates))), x, y) if d_matrix.ndim < 3: @@ -80,4 +79,4 @@ def create_grid_coordinates(x, y): #cbar.ax.tick_params(labelsize=3 * comp_width) - return ax, list(reversed(n_centers)) + return ax, list(reversed(n_centers)) \ No newline at end of file diff --git a/sompy/visualization/plot_tools2.py b/sompy/visualization/plot_tools2.py new file mode 100644 index 0000000..c49ddec --- /dev/null +++ b/sompy/visualization/plot_tools2.py @@ -0,0 +1,80 @@ +import math + +import numpy as np +from matplotlib import cm, pyplot as plt +from matplotlib.collections import RegularPolyCollection +from mpl_toolkits.axes_grid1 import make_axes_locatable + + +def plot_rect_map(d_matrix, titles=[], colormap=cm.gray, shape=[1, 1], comp_width=5, hex_shrink=1.0, fig=None, + colorbar=True): + """ + Plot hexagon map where each neuron is represented by a hexagon. The hexagon + color is given by the distance between the neurons (D-Matrix) + Args: + - grid: Grid dictionary (keys: centers, x, y ), + - d_matrix: array contaning the distances between each neuron + - w: width of the map in inches + - title: map title + Returns the Matplotlib SubAxis instance + """ + + d_matrix = np.flip(d_matrix, axis=0) + + def create_grid_coordinates(x, y): + coordinates = [x for row in -1 * np.array(list(range(x))) for x in + list(zip(np.arange(((row) % 2) * 0, y + ((row) % 2) * 0), [0.8660254 * (row)] * y))] + return (np.array(list(reversed(coordinates))), x, y) + + if d_matrix.ndim < 3: + d_matrix = np.expand_dims(d_matrix, 2) + + if len(titles) != d_matrix.shape[2]: + titles = [""] * d_matrix.shape[2] + + n_centers, x, y = create_grid_coordinates(*d_matrix.shape[:2]) + + # Size of figure in inches + if fig is None: + xinch, yinch = comp_width * shape[1], comp_width * (x / y) * shape[0] + fig = plt.figure(figsize=(xinch, yinch), dpi=72.) + + for comp, title in zip(range(d_matrix.shape[2]), titles): + ax = fig.add_subplot(shape[0], shape[1], comp + 1, aspect='equal') + + # Get pixel size between two data points + xpoints = n_centers[:, 0] + ypoints = n_centers[:, 1] + ax.scatter(xpoints, ypoints, s=0.0, marker='s') + ax.axis([min(xpoints) - 1., max(xpoints) + 1., + min(ypoints) - 1., max(ypoints) + 1.]) + xy_pixels = ax.transData.transform(np.vstack([xpoints, ypoints]).T) + xpix, ypix = xy_pixels.T + + # discover radius and hexagon + apothem = hex_shrink * (xpix[1] - xpix[0]) / math.sqrt(3) + area_inner_circle = math.pi * (apothem ** 2) + dm = d_matrix[:, :, comp].reshape(np.multiply(*d_matrix.shape[:2])) + collection_bg = RegularPolyCollection( + numsides=4, # a square + rotation=np.pi/4, + sizes=(area_inner_circle,), + array=dm, + cmap=colormap, + offsets=n_centers, + transOffset=ax.transData, + ) + ax.add_collection(collection_bg, autolim=True) + + ax.axis('off') + ax.autoscale_view() + ax.set_title(title)#, fontdict={"fontsize": 3 * comp_width}) + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="5%", pad=0.05) + cbar = plt.colorbar(collection_bg, cax=cax) + if not colorbar: + cbar.remove() + + #cbar.ax.tick_params(labelsize=3 * comp_width) + + return ax, list(reversed(n_centers)) \ No newline at end of file diff --git a/sompy/visualization/umatrix.py b/sompy/visualization/umatrix.py index a9a25b0..6665401 100644 --- a/sompy/visualization/umatrix.py +++ b/sompy/visualization/umatrix.py @@ -1,11 +1,9 @@ +from .view import MatplotView +from matplotlib import pyplot as plt +from pylab import imshow, contour from math import sqrt - import numpy as np -import pylab import scipy -from matplotlib import pyplot as plt - -from .view import MatplotView class UMatrixView(MatplotView): @@ -30,7 +28,7 @@ def build_u_matrix(self, som, distance=1, row_normalized=False): return Umatrix.reshape(som.codebook.mapsize) def show(self, som, distance2=1, row_normalized=False, show_data=True, - contour=True, blob=False, labels=False): + contooor=True, blob=False, labels=False): umat = self.build_u_matrix(som, distance=distance2, row_normalized=row_normalized) msz = som.codebook.mapsize @@ -38,16 +36,16 @@ def show(self, som, distance2=1, row_normalized=False, show_data=True, coord = som.bmu_ind_to_xy(proj) self._fig, ax = plt.subplots(1, 1) - pylab.imshow(umat, cmap=plt.cm.get_cmap('RdYlBu_r'), alpha=1) + imshow(umat, cmap=plt.cm.get_cmap('RdYlBu_r'), alpha=1) - if contour: + if contooor: mn = np.min(umat.flatten()) mx = np.max(umat.flatten()) std = np.std(umat.flatten()) md = np.median(umat.flatten()) - mx = md + 0 * std - pylab.contour(umat, np.linspace(mn, mx, 15), linewidths=0.7, - cmap=plt.cm.get_cmap('Blues')) + mx = md + 0*std + contour(umat, np.linspace(mn, mx, 15), linewidths=0.7, + cmap=plt.cm.get_cmap('Blues')) if show_data: plt.scatter(coord[:, 1], coord[:, 0], s=2, alpha=1., c='Gray', @@ -62,11 +60,11 @@ def show(self, som, distance2=1, row_normalized=False, show_data=True, horizontalalignment='center', verticalalignment='center') - ratio = float(msz[0]) / (msz[0] + msz[1]) - self._fig.set_size_inches((1 - ratio) * 15, ratio * 15) + ratio = float(msz[0])/(msz[0]+msz[1]) + self._fig.set_size_inches((1-ratio)*15, ratio*15) plt.tight_layout() plt.subplots_adjust(hspace=.00, wspace=.000) - sel_points = [] + sel_points = list() if blob: from skimage.color import rgb2gray @@ -78,8 +76,8 @@ def show(self, som, distance2=1, row_normalized=False, show_data=True, # 'Laplacian of Gaussian' blobs = blob_log(image, max_sigma=5, num_sigma=4, threshold=.152) blobs[:, 2] = blobs[:, 2] * sqrt(2) - pylab.imshow(umat, cmap=plt.cm.get_cmap('RdYlBu_r'), alpha=1) - sel_points = [] + imshow(umat, cmap=plt.cm.get_cmap('RdYlBu_r'), alpha=1) + sel_points = list() for blob in blobs: row, col, r = blob