diff --git a/sompy/sompy.py b/sompy/sompy.py index 6f4bf1e..a350011 100644 --- a/sompy/sompy.py +++ b/sompy/sompy.py @@ -137,7 +137,8 @@ def __init__(self, mapsize = self.calculate_map_size(lattice) if not mapsize else mapsize self.codebook = Codebook(mapsize, lattice) self.training = training - self._component_names = self.build_component_names() if component_names is None else [component_names] + self._component_names = (self.build_component_names() + if component_names is None else [component_names]) self._distance_matrix = self.calculate_map_dist() @property @@ -212,7 +213,8 @@ def train(self, :param n_job: number of jobs to use to parallelize the traning :param shared_memory: flag to active shared memory :param verbose: verbosity, could be 'debug', 'info' or None - :param train_len_factor: Factor that multiply default training lenghts (similar to "training" parameter in the matlab version). (lbugnon) + :param train_len_factor: Factor that multiply default training lenghts + (similar to "training" parameter in the matlab version). (lbugnon) """ logging.root.setLevel( getattr(logging, verbose.upper()) if verbose else logging.ERROR) @@ -241,9 +243,11 @@ def train(self, self.codebook.pca_linear_initialization(self._data) self.rough_train(njob=n_job, shared_memory=shared_memory, trainlen=train_rough_len, - radiusin=train_rough_radiusin, radiusfin=train_rough_radiusfin, trainlen_factor=train_len_factor, maxtrainlen=maxtrainlen) + radiusin=train_rough_radiusin, radiusfin=train_rough_radiusfin, + trainlen_factor=train_len_factor, maxtrainlen=maxtrainlen) self.finetune_train(njob=n_job, shared_memory=shared_memory, trainlen=train_finetune_len, - radiusin=train_finetune_radiusin, radiusfin=train_finetune_radiusfin, trainlen_factor=train_len_factor, maxtrainlen=maxtrainlen) + radiusin=train_finetune_radiusin, radiusfin=train_finetune_radiusfin, + trainlen_factor=train_len_factor, maxtrainlen=maxtrainlen) logging.debug( " --------------------------------------------------------------") logging.info(" Final quantization error: %f" % np.mean(self._bmu[1])) @@ -260,7 +264,8 @@ def _calculate_ms_and_mpd(self): return ms, mpd - def rough_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, radiusfin=None, trainlen_factor=1, maxtrainlen=np.Inf): + def rough_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, + radiusfin=None, trainlen_factor=1, maxtrainlen=np.Inf): logging.info(" Rough training...") ms, mpd = self._calculate_ms_and_mpd() @@ -280,7 +285,8 @@ def rough_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, self._batchtrain(trainlen, radiusin, radiusfin, njob, shared_memory) - def finetune_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, radiusfin=None, trainlen_factor=1, maxtrainlen=np.Inf): + def finetune_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=None, + radiusfin=None, trainlen_factor=1, maxtrainlen=np.Inf): logging.info(" Finetune training...") ms, mpd = self._calculate_ms_and_mpd() @@ -288,7 +294,7 @@ def finetune_train(self, njob=1, shared_memory=False, trainlen=None, radiusin=No #lbugnon: add maxtrainlen if self.initialization == 'random': trainlen = min(int(np.ceil(50 * mpd)), maxtrainlen) if not trainlen else trainlen - radiusin = max(1, ms / 12.) if not radiusin else radiusin # from radius fin in rough training + radiusin = max(1, ms / 12.) if not radiusin else radiusin radiusfin = max(1, radiusin / 25.) if not radiusfin else radiusfin elif self.initialization == 'pca': @@ -336,12 +342,11 @@ def _batchtrain(self, trainlen, radiusin, radiusfin, njob=1, self.codebook.matrix = self.update_codebook_voronoi(data, bmu, neighborhood) - #lbugnon: ojo! aca el bmy[1] a veces da negativo, y despues de eso se rompe...hay algo raro ahi + #lbugnon: ojo! aca el bmy[1] a veces da negativo, y despues de eso se rompe... + # hay algo raro ahi qerror = (i + 1, round(time() - t1, 3), - np.mean(np.sqrt(bmu[1] + fixed_euclidean_x2))) #lbugnon: ojo aca me tirĂ³ un warning, revisar (commit sinc: 965666d3d4d93bcf48e8cef6ea2c41a018c1cb83 ) - #lbugnon - #ipdb.set_trace() - # + np.mean(np.sqrt(bmu[1] + fixed_euclidean_x2))) + logging.info( " epoch: %d ---> elapsed time: %f, quantization error: %f\n" % qerror) @@ -618,12 +623,15 @@ def calculate_topographic_error(self): bmus2 = self.find_bmu(self.data_raw, njb=1, nth=2) topographic_error = None if self.codebook.lattice == "rect": - bmus_gap = np.abs((self.bmu_ind_to_xy(np.array(bmus1[0]))[:, 0:2] - self.bmu_ind_to_xy(np.array(bmus2[0]))[:, 0:2]).sum(axis=1)) + bmus_gap = np.abs((self.bmu_ind_to_xy(np.array(bmus1[0]))[:, 0:2] + - self.bmu_ind_to_xy(np.array(bmus2[0]))[:, 0:2]).sum(axis=1)) topographic_error = np.mean(bmus_gap != 1) elif self.codebook.lattice == "hexa": - dist_matrix_1 = self.codebook.lattice_distances[bmus1[0].astype(int)].reshape(len(bmus1[0]), -1) - topographic_error = (np.array( - [distances[bmu2] for bmu2, distances in zip(bmus2[0].astype(int), dist_matrix_1)]) > 2).mean() + dist_matrix_1 = (self.codebook.lattice_distances[bmus1[0].astype(int)] + .reshape(len(bmus1[0]), -1)) + topographic_error = (np.array([distances[bmu2] + for bmu2, distances + in zip(bmus2[0].astype(int), dist_matrix_1)]) > 2).mean() return(topographic_error) def calculate_quantization_error(self): @@ -632,8 +640,9 @@ def calculate_quantization_error(self): return quantization_error def calculate_map_size(self, lattice): - """ - Calculates the optimal map size given a dataset using eigenvalues and eigenvectors. Matlab ported + """Calculates the optimal map size given a dataset using eigenvalues and eigenvectors. + Matlab ported. + :lattice: 'rect' or 'hex' :return: map sizes """ diff --git a/sompy/visualization/bmuhits.py b/sompy/visualization/bmuhits.py index 3dd3946..cc7b870 100644 --- a/sompy/visualization/bmuhits.py +++ b/sompy/visualization/bmuhits.py @@ -26,9 +26,9 @@ def show(self, som, anotate=True, onlyzeros=False, labelsize=7, cmap="jet", loga self.height /= (self.width / org_w) if self.width > self.height else (self.height / org_h) counts = Counter(som._bmu[0]) - counts = [counts.get(x, 0) for x in range(som.codebook.mapsize[0] * som.codebook.mapsize[1])] - mp = np.array(counts).reshape(som.codebook.mapsize[0], - som.codebook.mapsize[1]) + counts = [counts.get(x, 0) + for x in range(som.codebook.mapsize[0] * som.codebook.mapsize[1])] + mp = np.array(counts).reshape(som.codebook.mapsize[0], som.codebook.mapsize[1]) if not logaritmic: norm = matplotlib.colors.Normalize( diff --git a/sompy/visualization/hitmap.py b/sompy/visualization/hitmap.py index 52f6675..9d86284 100644 --- a/sompy/visualization/hitmap.py +++ b/sompy/visualization/hitmap.py @@ -50,6 +50,7 @@ def show(self, som, data=None, anotate=True, onlyzeros=False, labelsize=7, cmap= plt.imshow(clusters.reshape(msz[0], msz[1])[::], alpha=.5) elif som.codebook.lattice == "hexa": - ax, cents = plot_hex_map(np.flip(clusters.reshape(msz[0], msz[1])[::], axis=1), fig=self._fig, colormap=cmap, colorbar=False) + ax, cents = plot_hex_map(np.flip(clusters.reshape(msz[0], msz[1])[::], axis=1), + fig=self._fig, colormap=cmap, colorbar=False) if anotate: self._set_labels(cents, ax, clusters, onlyzeros, labelsize, hex=True) diff --git a/sompy/visualization/mapview.py b/sompy/visualization/mapview.py index 4290d5b..bca09d8 100644 --- a/sompy/visualization/mapview.py +++ b/sompy/visualization/mapview.py @@ -86,16 +86,20 @@ def show(self, som, what='codebook', which_dim='all', cmap=None, ax = plt.subplot(no_row_in_plot, no_col_in_plot, axis_num) ind = int(indtoshow[axis_num - 1]) - min_color_scale = np.mean(codebook[:, ind].flatten()) - 1 * np.std(codebook[:, ind].flatten()) - max_color_scale = np.mean(codebook[:, ind].flatten()) + 1 * np.std(codebook[:, ind].flatten()) - min_color_scale = min_color_scale if min_color_scale >= min(codebook[:, ind].flatten()) else \ - min(codebook[:, ind].flatten()) - max_color_scale = max_color_scale if max_color_scale <= max(codebook[:, ind].flatten()) else \ - max(codebook[:, ind].flatten()) - norm = matplotlib.colors.Normalize(vmin=min_color_scale, vmax=max_color_scale, clip=True) - - mp = codebook[:, ind].reshape(som.codebook.mapsize[0], - som.codebook.mapsize[1]) + min_color_scale = (np.mean(codebook[:, ind].flatten()) + - 1 * np.std(codebook[:, ind].flatten())) + max_color_scale = (np.mean(codebook[:, ind].flatten()) + + 1 * np.std(codebook[:, ind].flatten())) + min_color_scale = (min_color_scale + if min_color_scale >= min(codebook[:, ind].flatten()) + else min(codebook[:, ind].flatten())) + max_color_scale = (max_color_scale + if max_color_scale <= max(codebook[:, ind].flatten()) + else max(codebook[:, ind].flatten())) + norm = matplotlib.colors.Normalize(vmin=min_color_scale, vmax=max_color_scale, + clip=True) + + mp = codebook[:, ind].reshape(som.codebook.mapsize[0], som.codebook.mapsize[1]) pl = plt.pcolor(mp[::-1], norm=norm) plt.axis([0, som.codebook.mapsize[1], 0, som.codebook.mapsize[0]]) plt.title(names[axis_num - 1]) @@ -103,8 +107,9 @@ def show(self, som, what='codebook', which_dim='all', cmap=None, ax.set_xticklabels([]) plt.colorbar(pl) elif som.codebook.lattice == "hexa": - plot_hex_map(codebook.reshape(som.codebook.mapsize + [som.codebook.matrix.shape[-1]]), titles=names, - shape=[no_row_in_plot, no_col_in_plot], colormap=cmap, fig=self._fig) + plot_hex_map(codebook.reshape(som.codebook.mapsize + [som.codebook.matrix.shape[-1]]), + titles=names, shape=[no_row_in_plot, no_col_in_plot], colormap=cmap, + fig=self._fig) class View2DPacked(MapView): diff --git a/sompy/visualization/plot_tools.py b/sompy/visualization/plot_tools.py index e54a676..8a96c38 100644 --- a/sompy/visualization/plot_tools.py +++ b/sompy/visualization/plot_tools.py @@ -6,8 +6,8 @@ from mpl_toolkits.axes_grid1 import make_axes_locatable -def plot_hex_map(d_matrix, titles=[], colormap=cm.gray, shape=[1, 1], comp_width=5, hex_shrink=1.0, fig=None, - colorbar=True): +def plot_hex_map(d_matrix, titles=[], colormap=cm.gray, shape=[1, 1], comp_width=5, + hex_shrink=1.0, fig=None, colorbar=True): """ Plot hexagon map where each neuron is represented by a hexagon. The hexagon color is given by the distance between the neurons (D-Matrix) @@ -25,7 +25,8 @@ def plot_hex_map(d_matrix, titles=[], colormap=cm.gray, shape=[1, 1], comp_width def create_grid_coordinates(x, y): coordinates = [x for row in -1 * np.array(list(range(x))) for x in - list(zip(np.arange(((row) % 2) * 0.5, y + ((row) % 2) * 0.5), [0.8660254 * (row)] * y))] + list(zip(np.arange(((row) % 2) * 0.5, y + ((row) % 2) * 0.5), + [0.8660254 * (row)] * y))] return (np.array(list(reversed(coordinates))), x, y) if d_matrix.ndim < 3: