Skip to content

Commit 96f64b2

Browse files
Create Kernel Density Estimation.py
Basic KDE plot
1 parent 266510e commit 96f64b2

File tree

1 file changed

+76
-0
lines changed

1 file changed

+76
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import numpy as np
2+
import time, os, sys
3+
import matplotlib.pyplot as plt
4+
import seaborn as sns
5+
import pandas as pd
6+
7+
from scipy.stats import norm, gaussian_kde
8+
from statsmodels.tsa.ar_model import AutoReg
9+
from sklearn.mixture import GaussianMixture
10+
11+
#Kernel Density Estimation (KDE)
12+
def plot_kde_seaborn(df, filename, column, output_dir):
13+
data = df
14+
# Extract relevant columns
15+
ingoing_column_name = f'{column}_ingoing'
16+
outgoing_column_name = f'{column}_outgoing'
17+
18+
ingoing = data[ingoing_column_name]
19+
outgoing = data[outgoing_column_name]
20+
21+
plt.figure(figsize=(12, 6))
22+
sns.kdeplot(ingoing, label=f'{column}_ingoing', fill=True)
23+
sns.kdeplot(outgoing, label=f'{column}_outgoing', fill=True)
24+
plt.xlabel(f'{column}_ingoing|outgoing')
25+
plt.title("Kernel Density Estimation (KDE)")
26+
plt.legend()
27+
plot_filename = f'{filename}_{column}_kde_sns.svg'
28+
29+
plt.savefig(os.path.join(output_dir, plot_filename))
30+
plt.show()
31+
plt.close()
32+
33+
def main(in_csv, out_dir):
34+
columns_bypass = ['timestamp', 'time_diff', 'source_ip', 'destination_ip']
35+
for filename in os.listdir(in_dir):
36+
if filename.endswith(".csv"):
37+
filename_without_ext, ext = os.path.splitext(filename)
38+
print(f"CSV File:\t{filename_without_ext}")
39+
packet_data = []
40+
csv_file_path = os.path.join(in_dir, filename)
41+
df = pd.read_csv(csv_file_path)
42+
data = df
43+
data['timestamp'] = pd.to_datetime(data['timestamp'])
44+
for column in data.columns:
45+
if (data[column] == 0).all():
46+
columns_bypass.append(column)
47+
# Iterate over each column (excluding 'timestamp' and 'time_diff') and plot KDE
48+
for column in data.columns:
49+
if column not in columns_bypass:
50+
try:
51+
parts = column.split("_", 2)
52+
protocol_name = "_".join(parts[:2])
53+
plot_kde_seaborn(data, filename_without_ext, protocol_name, out_dir)
54+
except ValueError as e:
55+
print(f"Could not plot KDE for column {column}: {e}")
56+
57+
if __name__ == "__main__":
58+
print("[" + __file__ + "]'s last modified: %s" % time.ctime(os.path.getmtime(__file__)))
59+
# Check if a parameter is provided
60+
if len(sys.argv) == 4 :
61+
in_dir = sys.argv[1]
62+
if not os.path.exists(in_dir):
63+
print(f"Directory: '{in_dir}' does not exist.")
64+
exit()
65+
print(f"\nCSV Directory:\t\t{in_dir}")
66+
67+
out_dir = sys.argv[2]
68+
if not os.path.exists(out_dir):
69+
os.makedirs(out_dir, exist_ok=True)
70+
print(f"SVG Files will save:\t{out_dir}")
71+
72+
IS_MALWARE = sys.argv[3]
73+
print(f"DATASET is malware:\t{IS_MALWARE}\n\n")
74+
main(in_dir, out_dir)
75+
else:
76+
print("No input directory and output directory provided.")

0 commit comments

Comments
 (0)