reproduce grid values last version

flatironinstitute · Oct 8, 2018 · cbcf95e · cbcf95e
1 parent c402c67
commit cbcf95e
Show file tree

Hide file tree

Showing 4 changed files with 113 additions and 112 deletions.
diff --git a/SLURM/slurmStart.sh b/SLURM/slurmStart.sh
@@ -7,7 +7,7 @@ profile="${SLURM_JOBID}_profile"
 clog="ipcontroller_${SLURM_JOBID}.log"
 #/mnt/xfs1/bioinfoCentos7/software/installs/python/2.7.10/bin/ipcontroller --profile=${profile} --ipython-dir=${pdir} --ip='*' > ${clog} 2>&1  &
 #/mnt/xfs1/home/agiovann/anaconda2/envs/CNMF/bin/ipcontroller --profile=${profile} --ipython-dir=${pdir} --ip='*' > ${clog} 2>&1  &
-/mnt/xfs1/home/agiovann/anaconda3/envs/caiman_0918/bin/ipcontroller --location=$(hostname -i) --profile=${profile} --ipython-dir=${pdir} --ip='*' > ${clog} 2>&1  &
+/mnt/xfs1/home/agiovann/anaconda3/envs/caiman/bin/ipcontroller --location=$(hostname -i) --profile=${profile} --ipython-dir=${pdir} --ip='*' > ${clog} 2>&1  &
 cpid=$!
 
 started=0
@@ -19,7 +19,7 @@ done
 
 [[ ${started} == 1 ]] || { echo "ipcontroller took too long to start. Exiting." ; exit 1 ; }
 
-srun bash -c '/mnt/xfs1/home/agiovann/anaconda3/envs/caiman_0918/bin/ipengine  --profile='${profile}' --ipython-dir='${pdir}' > ipengine_${SLURM_JOBID}_${SLURM_PROCID}.log 2>&1' &
+srun bash -c '/mnt/xfs1/home/agiovann/anaconda3/envs/caiman/bin/ipengine  --profile='${profile}' --ipython-dir='${pdir}' > ipengine_${SLURM_JOBID}_${SLURM_PROCID}.log 2>&1' &
 #srun bash -c '/mnt/xfs1/bioinfoCentos7/software/installs/python/2.7.10/bin/ipengine  --profile='${profile}' --ipython-dir='${pdir}' > ipengine_${SLURM_JOBID}_${SLURM_PROCID}.log 2>&1 &'
 
 started=0

diff --git a/use_cases/CaImAnpaper/scripts_paper/Preprocess_batch.py b/use_cases/CaImAnpaper/scripts_paper/Preprocess_batch.py
@@ -58,9 +58,9 @@
 
 print_figs = True
 skip_refinement = False
-backend_patch = 'SLURM'
-backend_refine = 'SLURM'
-n_processes = 56
+backend_patch = 'multiprocessing'
+backend_refine = 'multiprocessing'
+n_processes = 28
 base_folder = '/mnt/ceph/neuro/DataForPublications/DATA_PAPER_ELIFE/'
 n_pixels_per_process = 6000
 block_size = 6000
@@ -178,6 +178,7 @@
     # 'fname': '/opt/local/Data/labeling/neurofinder.02.00/Yr_d1_512_d2_512_d3_1_order_C_frames_8000_.mmap',
     'fname': 'N.02.00/Yr_d1_512_d2_512_d3_1_order_C_frames_8000_.mmap',
     'gtname': 'N.02.00/joined_consensus_active_regions.npy',
+    'tiffname': '/mnt/ceph/neuro/labeling/neurofinder.02.00/images/mmap_tifs/images_all_rig__d1_512_d2_512_d3_1_order_F_frames_8000_.tif',
     # order of the autoregressive system
     'merge_thresh': 0.8,  # merging threshold, max correlation allow
     'rf': 20,  # half-size of the patches in pixels. rf=25, patches are 50x50    20
@@ -246,9 +247,10 @@
 }
 params_movies.append(params_movie.copy())
 
-
-
-
+#%%
+from scipy.io import savemat
+for idx, params_movie in enumerate(params_movies):
+    savemat('/mnt/home/agiovann/SOFTWARE/CaImAn-MATLAB/file_parameters_' + str(idx) + '.mat', params_movie)
 # %%
 all_perfs = []
 all_rvalues = []

diff --git a/use_cases/CaImAnpaper/scripts_paper/batch_grid_params_search.py b/use_cases/CaImAnpaper/scripts_paper/batch_grid_params_search.py
@@ -7,6 +7,7 @@
 @author: agiovann
 """
 import cv2
+from dask.dataframe.core import idxmaxmin_agg
 
 try:
     cv2.setNumThreads(1)
@@ -40,7 +41,7 @@
 # %%  ANALYSIS MODE AND PARAMETERS
 preprocessing_from_scratch = False
 plot_on = False
-save_grid = False
+save_grid = True
 
 
 try:
@@ -503,109 +504,8 @@
 
 
     #%%
-    if False:
+    if save_grid:
         np.savez('/mnt/ceph/neuro/DataForPublications/DATA_PAPER_ELIFE/ALL_RECORDS_GRID_FINAL.npz', records=records)
-        #%%
-        with np.load('/mnt/ceph/neuro/DataForPublications/DATA_PAPER_ELIFE/ALL_RECORDS_GRID_FINAL.npz') as ld:
-            records = ld['records'][()]
-            records = [list(rec) for rec in records]
-    #%% Max of all datasets
-    df = DataFrame(records)
-    df.columns = ['name', 'gr_snr', 'grid_rval', 'grid_max_prob_rej', 'grid_thresh_CNN', 'recall',
-                  'precision', 'f1_score']
-    best_res = df.groupby(by=['name'])
-    best_res = best_res.describe()
-    max_caiman_batch = best_res['f1_score']['max']
-    print(max_caiman_batch)
-    #%%
-    df = DataFrame(records)
-    df.columns = ['name', 'gr_snr', 'grid_rval', 'grid_max_prob_rej', 'grid_thresh_CNN','recall', 'precision', 'f1_score']
-    best_res = df.groupby(by=['gr_snr', 'grid_rval', 'grid_max_prob_rej', 'grid_thresh_CNN'])
-    best_res = best_res.describe()
-    print(best_res.loc[:, 'f1_score'].max())
-    #%%
-    df = DataFrame(records)
-    df.columns = ['name', 'gr_snr', 'grid_rval', 'grid_max_prob_rej', 'grid_thresh_CNN','recall', 'precision', 'f1_score']
-    best_res = df.groupby(by=['gr_snr', 'grid_rval', 'grid_max_prob_rej', 'grid_thresh_CNN'])
-    best_res = best_res.describe()
-    pars = best_res.loc[:, 'f1_score'].idxmax()['mean']
-    print(pars)
-    df_result = df[((df['gr_snr'] == pars[0]) & (df['grid_rval'] == pars[1]) & (df['grid_max_prob_rej'] == pars[2]) & (df['grid_thresh_CNN'] == pars[3]))]
-
-    print(df_result.sort_values(by='name')[['name','precision','recall','f1_score']])
-    print(df_result.mean())
-    #%%
-    df_result = df_result.sort_values(by='name')
-    max_res = df.groupby(by=['name'])
-    max_res = max_res.describe()
-    max_res = max_res.sort_values(by='name')
-    max_res = max_res['f1_score']['max']
-    df_result['f1_score_max'] = max_res.values
-    min_res = df.groupby(by=['name'])
-    min_res = min_res.describe()
-    min_res = min_res.sort_values(by='name')
-    min_res = min_res['f1_score']['min']
-    df_result['f1_score_min'] = min_res.values
-
-
-    names = ['N.03.00.t',
-             'N.04.00.t',
-             'YST',
-             'N.00.00',
-             'N.02.00',
-             'N.01.01',
-             'K53',
-             'J115',
-             'J123']
-
-    idx_sort = np.argsort(names)
-    df_result['L1_f1'] = np.array([np.nan, np.nan, 0.78, np.nan, 0.89, 0.8, 0.89,  0.85, np.nan])[idx_sort]  # Human 1
-    df_result['L2_f1'] = np.array([0.9, 0.69, 0.9, 0.92, 0.87, 0.89, 0.92, 0.93, 0.83])[idx_sort]  # Human 2
-    df_result['L3_f1'] = np.array([0.85, 0.75, 0.82, 0.83, 0.84, 0.78, 0.93, 0.94, 0.9])[idx_sort] # Human 3
-    df_result['L4_f1'] = np.array([0.78, 0.87, 0.79, 0.87, 0.82, 0.75, 0.83, 0.83, 0.91])[idx_sort]
-
-    # df_result['L1_precision'] = np.array([np.nan, np.nan, 0.78, np.nan, 0.89, 0.8, 0.89,  0.85, np.nan])[idx_sort]  # Human 1
-    # df_result['L2_precision'] = np.array([0.9, 0.69, 0.9, 0.92, 0.87, 0.89, 0.92, 0.93, 0.83])[idx_sort]  # Human 2
-    # df_result['L3_precision'] = np.array([0.85, 0.75, 0.82, 0.83, 0.84, 0.78, 0.93, 0.94, 0.9])[idx_sort] # Human 3
-    # df_result['L4_precision'] = np.array([0.78, 0.87, 0.79, 0.87, 0.82, 0.75, 0.83, 0.83, 0.91])[idx_sort]
-    #
-    # df_result['L1_recall'] = np.array([np.nan, np.nan, 0.78, np.nan, 0.89, 0.8, 0.89,  0.85, np.nan])[idx_sort]  # Human 1
-    # df_result['L2_recall'] = np.array([0.9, 0.69, 0.9, 0.92, 0.87, 0.89, 0.92, 0.93, 0.83])[idx_sort]  # Human 2
-    # df_result['L3_recall'] = np.array([0.85, 0.75, 0.82, 0.83, 0.84, 0.78, 0.93, 0.94, 0.9])[idx_sort] # Human 3
-    # df_result['L4_recall'] = np.array([0.78, 0.87, 0.79, 0.87, 0.82, 0.75, 0.83, 0.83, 0.91])[idx_sort]
-
-
-
-
-    df_result['f1_score_CaImAn_online'] = [0.81, 0.81, 0.82, 0.69, 0.72, 0.78, 0.75, 0.67, 0.72]
-    df_result['precision_CaImAn_online'] = [0.75, 0.79, 0.80, 0.84, 0.75, 0.8, 0.77, 0.65, 0.75]
-    df_result['recall_CaImAn_online'] = [0.88, 0.82, 0.84, 0.58, 0.69, 0.76, 0.73, 0.7, 0.69]
-
-    ax = df_result.plot(x='name', y=['f1_score', 'f1_score_CaImAn_online','L4_f1','L3_f1','L2_f1','L1_f1'], xticks=range(len(df_result)),
-                        kind='bar', color=[[1,0,0],[0,0,1],[.5,.5,.5],[.6,.6,.6],[.7,.7,.7],[.8,.8,.8]])
-    ax.set_xticklabels(df_result.name, rotation=45)
-    pl.legend(['CaImAn batch','CaImAn online','L4','L3','L2','L1'])
-    # ax.set_xticklabels(df_result.name)
-    # pl.xlabel('Dataset')
-    pl.ylabel('F1 score')
-    pl.ylim([0.55,0.95])
-    params_display = {
-        'downsample_ratio': .2,
-        'thr_plot': 0.8
-    }
-
-    pl.rcParams['pdf.fonttype'] = 42
-    font = {'family': 'Arial',
-            'weight': 'regular',
-            'size': 20}
-    pl.rc('font', **font)
-
-    # with open('mytable.tex', 'w') as tf:
-    #     tf.write(df_result[['name','precision','recall','f1_score','f1_score_caiman']].round(2).to_latex())
-    #%% max versus average
-    all_labels = np.vstack([df_result['L1_f1'],df_result['L2_f1'], df_result['L3_f1'] , df_result['L4_f1']])
-    mean_labels = np.nanmean(all_labels,0).T
-    df_cm = DataFrame({'Human average':mean_labels,'CaImAn batch':max_caiman_batch})
-    df_cm.plot(kind='bar')
+
 
 
diff --git a/use_cases/CaImAnpaper/scripts_paper/figure_4.py b/use_cases/CaImAnpaper/scripts_paper/figure_4.py
@@ -0,0 +1,99 @@
+import numpy as np
+from pandas import DataFrame
+import pylab as pl
+#%% Figure 4b and GRID statistics
+with np.load('/mnt/ceph/neuro/DataForPublications/DATA_PAPER_ELIFE/ALL_RECORDS_GRID_FINAL.npz') as ld:
+    records = ld['records'][()]
+    records = [list(rec) for rec in records]
+    records = [rec[:5]+[float(rr) for rr in rec[5:]] for rec in records]
+#%% Max of all datasets
+df = DataFrame(records)
+df.columns = ['name', 'gr_snr', 'grid_rval', 'grid_max_prob_rej', 'grid_thresh_CNN', 'recall',
+              'precision', 'f1_score']
+best_res = df.groupby(by=['name'])
+best_res = best_res.describe()
+max_caiman_batch = best_res['f1_score']['max']
+print(max_caiman_batch)
+print(max_caiman_batch.mean())
+print(max_caiman_batch.std())
+#%%
+df = DataFrame(records)
+df.columns = ['name', 'gr_snr', 'grid_rval', 'grid_max_prob_rej', 'grid_thresh_CNN','recall', 'precision', 'f1_score']
+best_res = df.groupby(by=['gr_snr', 'grid_rval', 'grid_max_prob_rej', 'grid_thresh_CNN'])
+best_res = best_res.describe()
+print(best_res.loc[:, 'f1_score'].max())
+#%%
+df = DataFrame(records)
+df.columns = ['name', 'gr_snr', 'grid_rval', 'grid_max_prob_rej', 'grid_thresh_CNN','recall', 'precision', 'f1_score']
+best_res = df.groupby(by=['gr_snr', 'grid_rval', 'grid_max_prob_rej', 'grid_thresh_CNN'])
+best_res = best_res.describe()
+pars = best_res.loc[:, 'f1_score'].idxmax()['mean']
+print(pars)
+df_result = df[((df['gr_snr'] == pars[0]) & (df['grid_rval'] == pars[1]) & (df['grid_max_prob_rej'] == pars[2]) & (df['grid_thresh_CNN'] == pars[3]))]
+
+print(df_result.sort_values(by='name')[['name','precision','recall','f1_score']])
+print(df_result.mean())
+print(df_result.std())
+
+#%%
+df_result = df_result.sort_values(by='name')
+max_res = df.groupby(by=['name'])
+max_res = max_res.describe()
+max_res = max_res.sort_values(by='name')
+max_res = max_res['f1_score']['max']
+df_result['f1_score_max'] = max_res.values
+min_res = df.groupby(by=['name'])
+min_res = min_res.describe()
+min_res = min_res.sort_values(by='name')
+min_res = min_res['f1_score']['min']
+df_result['f1_score_min'] = min_res.values
+
+
+names = ['N.03.00.t',
+         'N.04.00.t',
+         'YST',
+         'N.00.00',
+         'N.02.00',
+         'N.01.01',
+         'K53',
+         'J115',
+         'J123']
+
+idx_sort = np.argsort(names)
+df_result['L1_f1'] = np.array([np.nan, np.nan, 0.78, np.nan, 0.89, 0.8, 0.89,  0.85, np.nan])[idx_sort]  # Human 1
+df_result['L2_f1'] = np.array([0.9, 0.69, 0.9, 0.92, 0.87, 0.89, 0.92, 0.93, 0.83])[idx_sort]  # Human 2
+df_result['L3_f1'] = np.array([0.85, 0.75, 0.82, 0.83, 0.84, 0.78, 0.93, 0.94, 0.9])[idx_sort] # Human 3
+df_result['L4_f1'] = np.array([0.78, 0.87, 0.79, 0.87, 0.82, 0.75, 0.83, 0.83, 0.91])[idx_sort]
+
+df_result['f1_score_CaImAn_online'] = np.array([0.74213836,  0.71713147,  0.78541374,  0.77562327,  0.69266771,
+    0.74285714,  0.80835509,  0.78950077,  0.83573487])[idx_sort]
+#%%
+ax = df_result.plot(x='name', y=['f1_score', 'f1_score_CaImAn_online','L4_f1','L3_f1','L2_f1','L1_f1'], xticks=range(len(df_result)),
+                    kind='bar', color=[[1,0,0],[0,0,1],[.5,.5,.5],[.6,.6,.6],[.7,.7,.7],[.8,.8,.8]])
+
+ax.set_xticklabels(df_result.name, rotation=45)
+pl.legend(['CaImAn batch','CaImAn online','L4','L3','L2','L1'])
+# ax.set_xticklabels(df_result.name)
+# pl.xlabel('Dataset')
+pl.ylabel('F1 score')
+pl.ylim([0.55,0.95])
+params_display = {
+    'downsample_ratio': .2,
+    'thr_plot': 0.8
+}
+
+pl.rcParams['pdf.fonttype'] = 42
+font = {'family': 'Arial',
+        'weight': 'regular',
+        'size': 20}
+pl.rc('font', **font)
+pl.close()
+#%%
+pl.figure()
+online_F1_max = np.array([0.75316456, 0.71713147, 0.79427083, 0.79733333, 0.718529,
+                          0.76, 0.84371328, 0.81400438, 0.83965015])[idx_sort]
+all_labels = np.vstack([df_result['L1_f1'],df_result['L2_f1'], df_result['L3_f1'] , df_result['L4_f1']])
+mean_labels = np.nanmean(all_labels,0).T
+df_cm = DataFrame({'Human average':mean_labels,'CaImAn online max':online_F1_max, 'CaImAn online avg': df_result['f1_score_CaImAn_online'].values
+                    ,'CaImAn batch max':max_caiman_batch,'CaImAn batch avg': df_result['f1_score'].values})
+df_cm.plot(kind='bar')