1
- import os
1
+ import os , sys
2
+ import os .path , time
2
3
import csv
3
- import time
4
- import sys
5
- from scapy .all import rdpcap
4
+ import pandas as pd
5
+
6
6
from collections import defaultdict
7
- from datetime import datetime
7
+ from datetime import datetime , timezone
8
+ from scapy .all import IP , TCP , UDP , rdpcap
9
+
10
+ def print_dic (ip_to_ip_data ):
11
+ # Print a portion of the dictionary
12
+ for i , ((src_ip , dst_ip ), time_series_data ) in enumerate (ip_to_ip_data .items ()):
13
+ print (f"Source IP: { src_ip } , Destination IP: { dst_ip } " )
14
+ for timestamp , sizes in sorted (time_series_data .items ())[:5 ]: # First 5 timestamps
15
+ print (f" { timestamp } : { sizes } " )
16
+ if i >= 10 : # Stop after showing 10 IP pairs
17
+ break
18
+
19
+ def process_pcap (pcap_file , output_dir ):
20
+ packets = rdpcap (pcap_file )
21
+
22
+ # Data structure to store time series data
23
+ # ip_to_ip_data = defaultdict(lambda: {"incoming": [], "outgoing": []})
24
+ ip_to_ip_data = defaultdict (lambda : defaultdict (lambda : {
25
+ 'L2_TCP_ingoing' : 0 , 'L2_TCP_outgoing' : 0 ,
26
+ 'L2_UDP_ingoing' : 0 , 'L2_UDP_outgoing' : 0 ,
27
+ 'L2_Other_ingoing' : 0 , 'L2_Other_outgoing' : 0 ,
28
+ 'L2_Total_ingoing' : 0 , 'L2_Total_outgoing' : 0 }
29
+ ))
30
+
31
+ total_ip_to_ip_data = defaultdict (lambda : {
32
+ 'L2_Total_TCP_ingoing' : 0 , 'L2_Total_TCP_outgoing' : 0 ,
33
+ 'L2_Total_UDP_ingoing' : 0 , 'L2_Total_UDP_outgoing' : 0 ,
34
+ 'L2_Total_Other_ingoing' : 0 , 'L2_Total_Other_outgoing' : 0
35
+ })
36
+
37
+ for packet in packets :
38
+ #fromtimestamp(float(packet.time), timezone.utc)
39
+ timestamp = packet .time
40
+ pkt_time = datetime .fromtimestamp (float (timestamp )).strftime ('%Y-%m-%d %H:%M:%S.%f' )
41
+ #pkt_time = datetime.fromtimestamp(int(timestamp), timezone.utc)
42
+ pkt_size = len (packet ) # Packet size in bytes
43
+ if packet .haslayer (TCP ):
44
+ src_ip = packet [IP ].src
45
+ dst_ip = packet [IP ].dst
46
+
47
+ # Update total traffic size for src->dst
48
+ ip_to_ip_data [(src_ip , dst_ip )][pkt_time ]['L2_TCP_ingoing' ] += pkt_size
49
+ # Update total traffic size for dst->src
50
+ ip_to_ip_data [(dst_ip , src_ip )][pkt_time ]['L2_TCP_outgoing' ] += pkt_size
51
+
52
+ total_ip_to_ip_data [(src_ip , dst_ip )]['L2_Total_TCP_ingoing' ] += pkt_size
53
+ total_ip_to_ip_data [(dst_ip , src_ip )]['L2_Total_TCP_outgoing' ] += pkt_size
54
+
55
+ elif packet .haslayer (UDP ):
56
+ src_ip = packet [IP ].src
57
+ dst_ip = packet [IP ].dst
58
+
59
+ # Update total traffic size for src->dst
60
+ ip_to_ip_data [(src_ip , dst_ip )][pkt_time ]['L2_UDP_ingoing' ] += pkt_size
61
+ #Update total traffic size for dst->src
62
+ ip_to_ip_data [(dst_ip , src_ip )][pkt_time ]['L2_UDP_outgoing' ] += pkt_size
63
+
64
+ total_ip_to_ip_data [(src_ip , dst_ip )]['L2_Total_UDP_ingoing' ] += pkt_size
65
+ total_ip_to_ip_data [(dst_ip , src_ip )]['L2_Total_UDP_outgoing' ] += pkt_size
66
+
67
+ else :
68
+ if IP in packet :
69
+ src_ip = packet [IP ].src
70
+ dst_ip = packet [IP ].dst
71
+ else :
72
+ src_ip = "not.ip.packet"
73
+ dst_ip = "not.ip.packet"
74
+ # Update total traffic size for src->dst
75
+ ip_to_ip_data [(src_ip , dst_ip )][pkt_time ]['L2_Other_ingoing' ] += pkt_size
76
+ # Update total traffic size for dst->src
77
+ ip_to_ip_data [(dst_ip , src_ip )][pkt_time ]['L2_Other_outgoing' ] += pkt_size
78
+
79
+ #print(src_ip, dst_ip)
80
+
81
+ total_ip_to_ip_data [(src_ip , dst_ip )]['L2_Total_Other_ingoing' ] += pkt_size
82
+ total_ip_to_ip_data [(dst_ip , src_ip )]['L2_Total_Other_outgoing' ] += pkt_size
83
+
84
+ #print(total_ip_to_ip_data)
85
+ for (src_ip , dst_ip ), time_series_data in ip_to_ip_data .items ():
86
+ #print(src_ip, dst_ip)
87
+ all_timestamp_key = list (time_series_data .keys ())
88
+ start_time = all_timestamp_key [0 ]
89
+ end_time = all_timestamp_key [- 1 ]
90
+ sorted_data = {}
91
+ csv_filename = f"{ output_dir } /{ src_ip } _to_{ dst_ip } .csv"
92
+
93
+ with open (csv_filename , mode = 'w' , newline = '' ) as csv_file :
94
+ fieldnames = [
95
+ 'timestamp' , 'source_ip' , 'destination_ip' ,
96
+ 'L2_TCP_ingoing' , 'L2_TCP_outgoing' ,
97
+ 'L2_UDP_ingoing' , 'L2_UDP_outgoing' ,
98
+ 'L2_Other_ingoing' , 'L2_Other_outgoing' ,
99
+ 'L2_Total_ingoing' , 'L2_Total_outgoing'
100
+ ]
101
+ writer = csv .DictWriter (csv_file , fieldnames = fieldnames )
102
+ writer .writeheader ()
103
+
104
+ for current_time in all_timestamp_key :
105
+ sorted_data [current_time ] = time_series_data [current_time ]
106
+
107
+ for timestamp , sizes in sorted (sorted_data .items ()):
108
+ row = {'timestamp' : timestamp , 'source_ip' : src_ip , 'destination_ip' : dst_ip }
109
+
110
+ total_ip_to_ip_data [(src_ip , dst_ip )]['L2_Total_TCP_ingoing' ] -= sizes ['L2_TCP_ingoing' ]
111
+ total_ip_to_ip_data [(src_ip , dst_ip )]['L2_Total_TCP_outgoing' ] -= sizes ['L2_TCP_outgoing' ]
112
+
113
+ total_ip_to_ip_data [(src_ip , dst_ip )]['L2_Total_UDP_ingoing' ] -= sizes ['L2_UDP_ingoing' ]
114
+ total_ip_to_ip_data [(src_ip , dst_ip )]['L2_Total_UDP_outgoing' ] -= sizes ['L2_UDP_outgoing' ]
115
+
116
+ total_ip_to_ip_data [(src_ip , dst_ip )]['L2_Total_Other_ingoing' ] -= sizes ['L2_Other_ingoing' ]
117
+ total_ip_to_ip_data [(src_ip , dst_ip )]['L2_Total_Other_outgoing' ] -= sizes ['L2_Other_outgoing' ]
118
+
119
+ row .update (sizes )
120
+ writer .writerow (row )
121
+ #Check everything is calculated correctly
122
+ for key , sub_dict in total_ip_to_ip_data .items ():
123
+ for sub_key in sub_dict .keys ():
124
+ if sub_dict [sub_key ] != 0 :
125
+ print ("Error....." , sub_key , sub_dict [sub_key ])
126
+ return
127
+ print (f"Processed and exported data to { output_dir } " )
128
+
129
+ def main (in_dir , out_dir ):
130
+ for filename in os .listdir (in_dir ):
131
+ if filename .endswith (".pcap" ):
132
+ print (f"PCAP File:\t { filename } " )
133
+ packet_data = []
134
+ pcap_file_path = os .path .join (in_dir , filename )
135
+ process_pcap (pcap_file_path , out_dir )
136
+
137
+ def run (in_dir , out_dir , IS_MALWARE ):
138
+ if not os .path .exists (in_dir ):
139
+ print (f"Directory: '{ in_dir } ' does not exist." )
140
+ exit ()
141
+ print (f"\n \n PCAP Directory:\t \t { in_dir } " )
142
+ if not os .path .exists (out_dir ):
143
+ os .makedirs (out_dir , exist_ok = True )
144
+ print (f"CSV Files will save:\t { out_dir } " )
145
+ print (f"DATASET is malware:\t { IS_MALWARE } \n \n " )
146
+ main (in_dir , out_dir )
147
+
148
+ if __name__ == "__main__" :
149
+ print ("[" + __file__ + "]'s last modified: %s" % time .ctime (os .path .getmtime (__file__ )))
150
+ # Check if a parameter is provided
151
+ if len (sys .argv ) == 4 :
152
+ in_dir = sys .argv [1 ]
153
+ if not os .path .exists (in_dir ):
154
+ print (f"Directory: '{ in_dir } ' does not exist." )
155
+ exit ()
156
+ print (f"\n \n PCAP Directory:\t \t { in_dir } " )
157
+
158
+ out_dir = sys .argv [2 ]
159
+ if not os .path .exists (out_dir ):
160
+ os .makedirs (out_dir , exist_ok = True )
161
+ print (f"CSV Files will save:\t { out_dir } " )
162
+
163
+ IS_MALWARE = sys .argv [3 ]
164
+ print (f"DATASET is malware:\t { IS_MALWARE } \n \n " )
165
+ main (in_dir , out_dir )
166
+ else :
167
+ print ("No input directory and output directory provided." )
0 commit comments