diff --git a/sompy/examples/.ipynb_checkpoints/AirFlights_hexagonal_grid-checkpoint.ipynb b/sompy/examples/.ipynb_checkpoints/AirFlights_hexagonal_grid-checkpoint.ipynb
new file mode 100644
index 0000000..64ef117
--- /dev/null
+++ b/sompy/examples/.ipynb_checkpoints/AirFlights_hexagonal_grid-checkpoint.ipynb
@@ -0,0 +1,694 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Study of airflight delay causes with Self Organizing Maps - Example of hexagonal lattice"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook is intended to be a brief guide on how to use Self Organizing Maps with the **SOMPY** library in Python. We are going to use hexagonal lattice in this example in order to understand the main causes of airflight cancellations\n",
+ "\n",
+ "##### Data description\n",
+ "________________________\n",
+ "\n",
+ "The U.S. Department of Transportation's (DOT) Bureau of Transportation Statistics (BTS) tracks the on-time performance of domestic flights operated by large air carriers. Summary information on the number of on-time, delayed, canceled and diverted flights appears in DOT's monthly Air Travel Consumer Report, published about 30 days after the month's end, as well as in summary tables posted on this website. BTS began collecting details on the causes of flight delays in June 2003. Summary statistics and raw data are made available to the public at the time the Air Travel Consumer Report is released.\n",
+ "\n",
+ "This version of the dataset was compiled from the Statistical Computing Statistical Graphics 2009 Data Expo and is also available [here](http://stat-computing.org/dataexpo/2009/the-data.html), [here](https://www.transtats.bts.gov/Fields.asp?Table_ID=236) and [here](https://www.kaggle.com/giovamata/airlinedelaycauses)\n",
+ "\n",
+ "**Fields description**\n",
+ "__________________________\n",
+ "1.\t**Year**:\t2008\n",
+ "2.\t**Month**:\t1-12\n",
+ "3.\t**DayofMonth**:\t1-31\n",
+ "4.\t**DayOfWeek**:\t1 (Monday) - 7 (Sunday)\n",
+ "5.\t**DepTime**:\tactual departure time (local, hhmm)\n",
+ "6.\t**CRSDepTime**:\tscheduled departure time (local, hhmm)\n",
+ "7.\t**ArrTime**:\tactual arrival time (local, hhmm)\n",
+ "8.\t**CRSArrTime**:\tscheduled arrival time (local, hhmm)\n",
+ "9.\t**UniqueCarrier**:\tunique carrier code\n",
+ "10.\t**FlightNum**:\tflight number\n",
+ "11.\t**TailNum**:\tplane tail number\n",
+ "12.\t**ActualElapsedTime**:\tin minutes\n",
+ "13.\t**CRSElapsedTime**:\tin minutes\n",
+ "14.\t**AirTime**:\tin minutes\n",
+ "15.\t**ArrDelay**:\tarrival delay, in minutes\n",
+ "16.\t**DepDelay**:\tdeparture delay, in minutes\n",
+ "17.\t**Origin**:\torigin IATA airport code\n",
+ "18.\t**Dest**:\tdestination IATA airport code\n",
+ "19.\t**Distance**:\tin miles\n",
+ "20.\t**TaxiIn**:\ttaxi in time, in minutes\n",
+ "21.\t**TaxiOut**:\ttaxi out time in minutes\n",
+ "22.\t**Cancelled**:\twas the flight cancelled?\n",
+ "23.\t**CancellationCode**:\treason for cancellation (A = carrier, B = weather, C = NAS, D = security)\n",
+ "24.\t**Diverted**:\t1 = yes, 0 = no\n",
+ "25.\t**CarrierDelay**:\tin minutes\n",
+ "26.\t**WeatherDelay**:\tin minutes\n",
+ "27.\t**NASDelay**:\tNational Air System delay in minutes\n",
+ "28.\t**SecurityDelay**\tin minutes\n",
+ "29.\t**LateAircraftDelay**\tin minutes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "backend module://ipykernel.pylab.backend_inline version unknown\n"
+ ]
+ }
+ ],
+ "source": [
+ "%matplotlib inline\n",
+ "import math\n",
+ "import glob\n",
+ "import matplotlib.pyplot as plt\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import urllib3\n",
+ "from sklearn.externals import joblib\n",
+ "import random\n",
+ "import matplotlib\n",
+ "from sompy.sompy import SOMFactory\n",
+ "from sompy.visualization.plot_tools import plot_hex_map\n",
+ "import logging\n"
+ ]
+ },
+ {
+ "cell_type": "raw",
+ "metadata": {},
+ "source": [
+ "!kaggle datasets download -d giovamata/airlinedelaycauses\n",
+ "!unzip airlinedelaycauses.zip"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data Processing"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.read_csv(\"./DelayedFlights.csv\")\n",
+ "\n",
+ "df = df[[\"Month\",\"DayofMonth\", \"DayOfWeek\",\"DepTime\", \"AirTime\",\n",
+ " \"Distance\", \"SecurityDelay\",\"WeatherDelay\", \"NASDelay\", \"CarrierDelay\",\n",
+ " \"ArrDelay\", \"DepDelay\", \"LateAircraftDelay\", \"Cancelled\"]]\n",
+ "clustering_vars = [\"Month\", \"DayofMonth\", \"DepTime\", \"AirTime\", \n",
+ " \"LateAircraftDelay\", \"DepDelay\", \"ArrDelay\", \"CarrierDelay\"]\n",
+ "df = df.fillna(0)\n",
+ "data = df[clustering_vars].values\n",
+ "names = clustering_vars"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Month | \n",
+ " DayofMonth | \n",
+ " DayOfWeek | \n",
+ " DepTime | \n",
+ " AirTime | \n",
+ " Distance | \n",
+ " SecurityDelay | \n",
+ " WeatherDelay | \n",
+ " NASDelay | \n",
+ " CarrierDelay | \n",
+ " ArrDelay | \n",
+ " DepDelay | \n",
+ " LateAircraftDelay | \n",
+ " Cancelled | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ " 1.936758e+06 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 6.111106e+00 | \n",
+ " 1.575347e+01 | \n",
+ " 3.984827e+00 | \n",
+ " 1.518534e+03 | \n",
+ " 1.078083e+02 | \n",
+ " 7.656862e+02 | \n",
+ " 5.805836e-02 | \n",
+ " 2.385512e+00 | \n",
+ " 9.675607e+00 | \n",
+ " 1.235367e+01 | \n",
+ " 4.201714e+01 | \n",
+ " 4.318518e+01 | \n",
+ " 1.629374e+01 | \n",
+ " 3.268348e-04 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 3.482546e+00 | \n",
+ " 8.776272e+00 | \n",
+ " 1.995966e+00 | \n",
+ " 4.504853e+02 | \n",
+ " 6.886184e+01 | \n",
+ " 5.744797e+02 | \n",
+ " 1.623934e+00 | \n",
+ " 1.734036e+01 | \n",
+ " 2.808958e+01 | \n",
+ " 3.613493e+01 | \n",
+ " 5.672935e+01 | \n",
+ " 5.340250e+01 | \n",
+ " 3.585904e+01 | \n",
+ " 1.807562e-02 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 1.000000e+00 | \n",
+ " 1.000000e+00 | \n",
+ " 1.000000e+00 | \n",
+ " 1.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 1.100000e+01 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " -1.090000e+02 | \n",
+ " 6.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 3.000000e+00 | \n",
+ " 8.000000e+00 | \n",
+ " 2.000000e+00 | \n",
+ " 1.203000e+03 | \n",
+ " 5.800000e+01 | \n",
+ " 3.380000e+02 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 9.000000e+00 | \n",
+ " 1.200000e+01 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 6.000000e+00 | \n",
+ " 1.600000e+01 | \n",
+ " 4.000000e+00 | \n",
+ " 1.545000e+03 | \n",
+ " 9.000000e+01 | \n",
+ " 6.060000e+02 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 2.400000e+01 | \n",
+ " 2.400000e+01 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 9.000000e+00 | \n",
+ " 2.300000e+01 | \n",
+ " 6.000000e+00 | \n",
+ " 1.900000e+03 | \n",
+ " 1.370000e+02 | \n",
+ " 9.980000e+02 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 6.000000e+00 | \n",
+ " 1.000000e+01 | \n",
+ " 5.500000e+01 | \n",
+ " 5.300000e+01 | \n",
+ " 1.800000e+01 | \n",
+ " 0.000000e+00 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 1.200000e+01 | \n",
+ " 3.100000e+01 | \n",
+ " 7.000000e+00 | \n",
+ " 2.400000e+03 | \n",
+ " 1.091000e+03 | \n",
+ " 4.962000e+03 | \n",
+ " 3.920000e+02 | \n",
+ " 1.352000e+03 | \n",
+ " 1.357000e+03 | \n",
+ " 2.436000e+03 | \n",
+ " 2.461000e+03 | \n",
+ " 2.467000e+03 | \n",
+ " 1.316000e+03 | \n",
+ " 1.000000e+00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Month DayofMonth DayOfWeek DepTime AirTime \\\n",
+ "count 1.936758e+06 1.936758e+06 1.936758e+06 1.936758e+06 1.936758e+06 \n",
+ "mean 6.111106e+00 1.575347e+01 3.984827e+00 1.518534e+03 1.078083e+02 \n",
+ "std 3.482546e+00 8.776272e+00 1.995966e+00 4.504853e+02 6.886184e+01 \n",
+ "min 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 0.000000e+00 \n",
+ "25% 3.000000e+00 8.000000e+00 2.000000e+00 1.203000e+03 5.800000e+01 \n",
+ "50% 6.000000e+00 1.600000e+01 4.000000e+00 1.545000e+03 9.000000e+01 \n",
+ "75% 9.000000e+00 2.300000e+01 6.000000e+00 1.900000e+03 1.370000e+02 \n",
+ "max 1.200000e+01 3.100000e+01 7.000000e+00 2.400000e+03 1.091000e+03 \n",
+ "\n",
+ " Distance SecurityDelay WeatherDelay NASDelay CarrierDelay \\\n",
+ "count 1.936758e+06 1.936758e+06 1.936758e+06 1.936758e+06 1.936758e+06 \n",
+ "mean 7.656862e+02 5.805836e-02 2.385512e+00 9.675607e+00 1.235367e+01 \n",
+ "std 5.744797e+02 1.623934e+00 1.734036e+01 2.808958e+01 3.613493e+01 \n",
+ "min 1.100000e+01 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
+ "25% 3.380000e+02 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
+ "50% 6.060000e+02 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 \n",
+ "75% 9.980000e+02 0.000000e+00 0.000000e+00 6.000000e+00 1.000000e+01 \n",
+ "max 4.962000e+03 3.920000e+02 1.352000e+03 1.357000e+03 2.436000e+03 \n",
+ "\n",
+ " ArrDelay DepDelay LateAircraftDelay Cancelled \n",
+ "count 1.936758e+06 1.936758e+06 1.936758e+06 1.936758e+06 \n",
+ "mean 4.201714e+01 4.318518e+01 1.629374e+01 3.268348e-04 \n",
+ "std 5.672935e+01 5.340250e+01 3.585904e+01 1.807562e-02 \n",
+ "min -1.090000e+02 6.000000e+00 0.000000e+00 0.000000e+00 \n",
+ "25% 9.000000e+00 1.200000e+01 0.000000e+00 0.000000e+00 \n",
+ "50% 2.400000e+01 2.400000e+01 0.000000e+00 0.000000e+00 \n",
+ "75% 5.500000e+01 5.300000e+01 1.800000e+01 0.000000e+00 \n",
+ "max 2.461000e+03 2.467000e+03 1.316000e+03 1.000000e+00 "
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Model Training "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As the data is relatively high, the model takes some time to train. We didn't finetune the hyperparameters of the algorithm and this is a potential improvement topic."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%time\n",
+ "# Train the model with different parameters. The more, the better. Each iteration is stored in disk for further study\n",
+ "for i in range(1000):\n",
+ " sm = SOMFactory().build(data, mapsize=[random.choice(list(range(15, 25))), \n",
+ " random.choice(list(range(10, 15)))],\n",
+ " normalization = 'var', initialization='random', component_names=names, lattice=\"hexa\")\n",
+ " sm.train(n_job=4, verbose=False, train_rough_len=30, train_finetune_len=100)\n",
+ " joblib.dump(sm, \"model_{}.joblib\".format(i))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [],
+ "source": [
+ "# Study the models trained and plot the errors obtained in order to select the best one\n",
+ "models_pool = glob.glob(\"./model*\")\n",
+ "errors=[]\n",
+ "for model_filepath in models_pool:\n",
+ " sm = joblib.load(model_filepath)\n",
+ " topographic_error = sm.calculate_topographic_error()\n",
+ " quantization_error = sm.calculate_quantization_error()\n",
+ " errors.append((topographic_error, quantization_error))\n",
+ "e_top, e_q = zip(*errors)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ "