Skip to content

Commit 14da891

Browse files
Update convert_from_pcap_to_l3_level_protocol_based_incoming_outgoing_bytes.py
+deleted unneeded imports
1 parent e5b1499 commit 14da891

File tree

1 file changed

+165
-5
lines changed

1 file changed

+165
-5
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,167 @@
1-
import os
1+
import os, sys
2+
import os.path, time
23
import csv
3-
import time
4-
import sys
5-
from scapy.all import rdpcap
4+
import pandas as pd
5+
66
from collections import defaultdict
7-
from datetime import datetime
7+
from datetime import datetime, timezone
8+
from scapy.all import IP, TCP, UDP, rdpcap
9+
10+
def print_dic(ip_to_ip_data):
11+
# Print a portion of the dictionary
12+
for i, ((src_ip, dst_ip), time_series_data) in enumerate(ip_to_ip_data.items()):
13+
print(f"Source IP: {src_ip}, Destination IP: {dst_ip}")
14+
for timestamp, sizes in sorted(time_series_data.items())[:5]: # First 5 timestamps
15+
print(f" {timestamp}: {sizes}")
16+
if i >= 10: # Stop after showing 10 IP pairs
17+
break
18+
19+
def process_pcap(pcap_file, output_dir):
20+
packets = rdpcap(pcap_file)
21+
22+
# Data structure to store time series data
23+
# ip_to_ip_data = defaultdict(lambda: {"incoming": [], "outgoing": []})
24+
ip_to_ip_data = defaultdict(lambda: defaultdict(lambda: {
25+
'L2_TCP_ingoing': 0, 'L2_TCP_outgoing': 0,
26+
'L2_UDP_ingoing': 0, 'L2_UDP_outgoing': 0,
27+
'L2_Other_ingoing': 0, 'L2_Other_outgoing': 0,
28+
'L2_Total_ingoing': 0, 'L2_Total_outgoing': 0 }
29+
))
30+
31+
total_ip_to_ip_data = defaultdict(lambda: {
32+
'L2_Total_TCP_ingoing': 0, 'L2_Total_TCP_outgoing': 0,
33+
'L2_Total_UDP_ingoing': 0, 'L2_Total_UDP_outgoing': 0,
34+
'L2_Total_Other_ingoing': 0, 'L2_Total_Other_outgoing': 0
35+
})
36+
37+
for packet in packets:
38+
#fromtimestamp(float(packet.time), timezone.utc)
39+
timestamp = packet.time
40+
pkt_time = datetime.fromtimestamp(float(timestamp)).strftime('%Y-%m-%d %H:%M:%S.%f')
41+
#pkt_time = datetime.fromtimestamp(int(timestamp), timezone.utc)
42+
pkt_size = len(packet) # Packet size in bytes
43+
if packet.haslayer(TCP):
44+
src_ip = packet[IP].src
45+
dst_ip = packet[IP].dst
46+
47+
# Update total traffic size for src->dst
48+
ip_to_ip_data[(src_ip, dst_ip)][pkt_time]['L2_TCP_ingoing'] += pkt_size
49+
# Update total traffic size for dst->src
50+
ip_to_ip_data[(dst_ip, src_ip)][pkt_time]['L2_TCP_outgoing'] += pkt_size
51+
52+
total_ip_to_ip_data[(src_ip, dst_ip)]['L2_Total_TCP_ingoing'] += pkt_size
53+
total_ip_to_ip_data[(dst_ip, src_ip)]['L2_Total_TCP_outgoing'] += pkt_size
54+
55+
elif packet.haslayer(UDP):
56+
src_ip = packet[IP].src
57+
dst_ip = packet[IP].dst
58+
59+
# Update total traffic size for src->dst
60+
ip_to_ip_data[(src_ip, dst_ip)][pkt_time]['L2_UDP_ingoing'] += pkt_size
61+
#Update total traffic size for dst->src
62+
ip_to_ip_data[(dst_ip, src_ip)][pkt_time]['L2_UDP_outgoing'] += pkt_size
63+
64+
total_ip_to_ip_data[(src_ip, dst_ip)]['L2_Total_UDP_ingoing'] += pkt_size
65+
total_ip_to_ip_data[(dst_ip, src_ip)]['L2_Total_UDP_outgoing'] += pkt_size
66+
67+
else:
68+
if IP in packet:
69+
src_ip = packet[IP].src
70+
dst_ip = packet[IP].dst
71+
else:
72+
src_ip = "not.ip.packet"
73+
dst_ip = "not.ip.packet"
74+
# Update total traffic size for src->dst
75+
ip_to_ip_data[(src_ip, dst_ip)][pkt_time]['L2_Other_ingoing'] += pkt_size
76+
# Update total traffic size for dst->src
77+
ip_to_ip_data[(dst_ip, src_ip)][pkt_time]['L2_Other_outgoing'] += pkt_size
78+
79+
#print(src_ip, dst_ip)
80+
81+
total_ip_to_ip_data[(src_ip, dst_ip)]['L2_Total_Other_ingoing'] += pkt_size
82+
total_ip_to_ip_data[(dst_ip, src_ip)]['L2_Total_Other_outgoing'] += pkt_size
83+
84+
#print(total_ip_to_ip_data)
85+
for (src_ip, dst_ip), time_series_data in ip_to_ip_data.items():
86+
#print(src_ip, dst_ip)
87+
all_timestamp_key = list(time_series_data.keys())
88+
start_time = all_timestamp_key[0]
89+
end_time = all_timestamp_key[-1]
90+
sorted_data = {}
91+
csv_filename = f"{output_dir}/{src_ip}_to_{dst_ip}.csv"
92+
93+
with open(csv_filename, mode='w', newline='') as csv_file:
94+
fieldnames = [
95+
'timestamp', 'source_ip', 'destination_ip',
96+
'L2_TCP_ingoing', 'L2_TCP_outgoing',
97+
'L2_UDP_ingoing', 'L2_UDP_outgoing',
98+
'L2_Other_ingoing', 'L2_Other_outgoing',
99+
'L2_Total_ingoing', 'L2_Total_outgoing'
100+
]
101+
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
102+
writer.writeheader()
103+
104+
for current_time in all_timestamp_key:
105+
sorted_data[current_time] = time_series_data[current_time]
106+
107+
for timestamp, sizes in sorted(sorted_data.items()):
108+
row = {'timestamp': timestamp, 'source_ip': src_ip, 'destination_ip': dst_ip}
109+
110+
total_ip_to_ip_data[(src_ip, dst_ip)]['L2_Total_TCP_ingoing'] -= sizes['L2_TCP_ingoing']
111+
total_ip_to_ip_data[(src_ip, dst_ip)]['L2_Total_TCP_outgoing'] -= sizes['L2_TCP_outgoing']
112+
113+
total_ip_to_ip_data[(src_ip, dst_ip)]['L2_Total_UDP_ingoing'] -= sizes['L2_UDP_ingoing']
114+
total_ip_to_ip_data[(src_ip, dst_ip)]['L2_Total_UDP_outgoing'] -= sizes['L2_UDP_outgoing']
115+
116+
total_ip_to_ip_data[(src_ip, dst_ip)]['L2_Total_Other_ingoing'] -= sizes['L2_Other_ingoing']
117+
total_ip_to_ip_data[(src_ip, dst_ip)]['L2_Total_Other_outgoing'] -= sizes['L2_Other_outgoing']
118+
119+
row.update(sizes)
120+
writer.writerow(row)
121+
#Check everything is calculated correctly
122+
for key, sub_dict in total_ip_to_ip_data.items():
123+
for sub_key in sub_dict.keys():
124+
if sub_dict[sub_key] != 0:
125+
print("Error.....", sub_key, sub_dict[sub_key])
126+
return
127+
print(f"Processed and exported data to {output_dir}")
128+
129+
def main(in_dir, out_dir):
130+
for filename in os.listdir(in_dir):
131+
if filename.endswith(".pcap"):
132+
print(f"PCAP File:\t{filename}")
133+
packet_data = []
134+
pcap_file_path = os.path.join(in_dir, filename)
135+
process_pcap(pcap_file_path, out_dir)
136+
137+
def run(in_dir, out_dir, IS_MALWARE):
138+
if not os.path.exists(in_dir):
139+
print(f"Directory: '{in_dir}' does not exist.")
140+
exit()
141+
print(f"\n\nPCAP Directory:\t\t{in_dir}")
142+
if not os.path.exists(out_dir):
143+
os.makedirs(out_dir, exist_ok=True)
144+
print(f"CSV Files will save:\t{out_dir}")
145+
print(f"DATASET is malware:\t{IS_MALWARE}\n\n")
146+
main(in_dir, out_dir)
147+
148+
if __name__ == "__main__":
149+
print("[" + __file__ + "]'s last modified: %s" % time.ctime(os.path.getmtime(__file__)))
150+
# Check if a parameter is provided
151+
if len(sys.argv) == 4 :
152+
in_dir = sys.argv[1]
153+
if not os.path.exists(in_dir):
154+
print(f"Directory: '{in_dir}' does not exist.")
155+
exit()
156+
print(f"\n\nPCAP Directory:\t\t{in_dir}")
157+
158+
out_dir = sys.argv[2]
159+
if not os.path.exists(out_dir):
160+
os.makedirs(out_dir, exist_ok=True)
161+
print(f"CSV Files will save:\t{out_dir}")
162+
163+
IS_MALWARE = sys.argv[3]
164+
print(f"DATASET is malware:\t{IS_MALWARE}\n\n")
165+
main(in_dir, out_dir)
166+
else:
167+
print("No input directory and output directory provided.")

0 commit comments

Comments
 (0)