1
+ import numpy as np
2
+ import time , os , sys
3
+ import matplotlib .pyplot as plt
4
+ import seaborn as sns
5
+ import pandas as pd
6
+
7
+ from scipy .stats import norm , gaussian_kde
8
+ from statsmodels .tsa .ar_model import AutoReg
9
+ from sklearn .mixture import GaussianMixture
10
+
11
+ #Kernel Density Estimation (KDE)
12
+ def plot_kde_seaborn (df , filename , column , output_dir ):
13
+ data = df
14
+ # Extract relevant columns
15
+ ingoing_column_name = f'{ column } _ingoing'
16
+ outgoing_column_name = f'{ column } _outgoing'
17
+
18
+ ingoing = data [ingoing_column_name ]
19
+ outgoing = data [outgoing_column_name ]
20
+
21
+ plt .figure (figsize = (12 , 6 ))
22
+ sns .kdeplot (ingoing , label = f'{ column } _ingoing' , fill = True )
23
+ sns .kdeplot (outgoing , label = f'{ column } _outgoing' , fill = True )
24
+ plt .xlabel (f'{ column } _ingoing|outgoing' )
25
+ plt .title ("Kernel Density Estimation (KDE)" )
26
+ plt .legend ()
27
+ plot_filename = f'{ filename } _{ column } _kde_sns.svg'
28
+
29
+ plt .savefig (os .path .join (output_dir , plot_filename ))
30
+ plt .show ()
31
+ plt .close ()
32
+
33
+ def main (in_csv , out_dir ):
34
+ columns_bypass = ['timestamp' , 'time_diff' , 'source_ip' , 'destination_ip' ]
35
+ for filename in os .listdir (in_dir ):
36
+ if filename .endswith (".csv" ):
37
+ filename_without_ext , ext = os .path .splitext (filename )
38
+ print (f"CSV File:\t { filename_without_ext } " )
39
+ packet_data = []
40
+ csv_file_path = os .path .join (in_dir , filename )
41
+ df = pd .read_csv (csv_file_path )
42
+ data = df
43
+ data ['timestamp' ] = pd .to_datetime (data ['timestamp' ])
44
+ for column in data .columns :
45
+ if (data [column ] == 0 ).all ():
46
+ columns_bypass .append (column )
47
+ # Iterate over each column (excluding 'timestamp' and 'time_diff') and plot KDE
48
+ for column in data .columns :
49
+ if column not in columns_bypass :
50
+ try :
51
+ parts = column .split ("_" , 2 )
52
+ protocol_name = "_" .join (parts [:2 ])
53
+ plot_kde_seaborn (data , filename_without_ext , protocol_name , out_dir )
54
+ except ValueError as e :
55
+ print (f"Could not plot KDE for column { column } : { e } " )
56
+
57
+ if __name__ == "__main__" :
58
+ print ("[" + __file__ + "]'s last modified: %s" % time .ctime (os .path .getmtime (__file__ )))
59
+ # Check if a parameter is provided
60
+ if len (sys .argv ) == 4 :
61
+ in_dir = sys .argv [1 ]
62
+ if not os .path .exists (in_dir ):
63
+ print (f"Directory: '{ in_dir } ' does not exist." )
64
+ exit ()
65
+ print (f"\n CSV Directory:\t \t { in_dir } " )
66
+
67
+ out_dir = sys .argv [2 ]
68
+ if not os .path .exists (out_dir ):
69
+ os .makedirs (out_dir , exist_ok = True )
70
+ print (f"SVG Files will save:\t { out_dir } " )
71
+
72
+ IS_MALWARE = sys .argv [3 ]
73
+ print (f"DATASET is malware:\t { IS_MALWARE } \n \n " )
74
+ main (in_dir , out_dir )
75
+ else :
76
+ print ("No input directory and output directory provided." )
0 commit comments