通過解析pcap文件,按照{src ip, src port, transport protocol , dst ip, dst port} 拆分流,並提取出前向流(Forward)與後向流(Backward),代碼如下:
import pyshark
import pandas as pd
class Net_flow(object):
def __init__(self, file_path):
self.cap = pyshark.FileCapture(file_path)
# {ip_server, ip_client,transport ,port_server, port_client}
def get_target_client_ip_port(self, num=None):
for index, pkt in enumerate(self.cap):
ip_server = pkt.ip.src
port_server = pkt.tcp.srcport
# protocol_number = pkt.ip.proto #有時要提前協議號,就是這行代碼 icmp 1, igmp 2, tcp 6, udp 17
ip_client = pkt.ip.dst
port_client = pkt.tcp.dstport
timestamp = pkt.sniff_timestamp
transport_layer = pkt.transport_layer
length = pkt.length
if num:# 如果指定num=100,則只會輸出100個流
if index > num:
return [ip_server + ":" + port_server, ip_client + ":" + port_client, transport_layer, timestamp, length]
yield [ip_server + ":" + port_server, ip_client + ":" + port_client, transport_layer, timestamp,length]
if __name__ == '__main__':
try:
pcap_file = "pacp文件地址"
net_flow = Net_flow(pcap_file)
target_client_ip_port = net_flow.get_target_client_ip_port()
with open("保存的文件.csv', 'a') as f:# 將提取出的五元組保存起來
for target_client_ip_port_temp in target_client_ip_port:
write_str = ",".join(target_client_ip_port_temp)
f.write(write_str + "\r\n")
except Exception as e:
print(e)
上面的代碼提取出了五元組,將它們保存起來或是直接放在內存中,然後就可以根據這個提取網絡流了,這裏包括前向流(forward)與後向流(backward):
import pyshark
import pandas as pd
def get_features(file_name):
finish_flow_list = []
dataframe = pd.read_csv(file_name, header=None)
dataframe.columns = ['addr_ip', 'dst_ip', 'protocol', 'timestamp', 'length']
# 思路是通過value_counts將大的dataframe拆分成小的dataframe
addr_diff = dataframe['addr_ip'].value_counts().index
for addr_ip in addr_diff:
addr_df = dataframe[dataframe['addr_ip'] == addr_ip]
diff_dst_index = addr_df['dst_ip'].value_counts().index
for dst_ip in diff_dst_index:
# 定義addr_ip->dst_ip爲forward
forward_se = dataframe.loc[dataframe['addr_ip'] == addr_ip, 'dst_ip'] == dst_ip # 這是通過兩列數據定位dataframe
forward_df = dataframe.loc[forward_se[forward_se == True].index]
forward_df['state'] = 'forward'
backward_se = dataframe.loc[dataframe['addr_ip'] == dst_ip, 'dst_ip'] == addr_ip
backward_df = dataframe.loc[backward_se[backward_se == True].index]
backward_df['state'] = 'backward'
yield pd.concat([forward_df, backward_df])
def analyze_flow(dataframe):
forward_df_all = dataframe[dataframe['state'] == 'forward']
backward_df_all = dataframe[dataframe['state'] == 'backward']
# 對前向流與後向流的操作
pass
if __name__ == '__main__':
try:
flow_df = get_features(“五元組.csv”)
for df in flow_df:
analyze_flow(df)
except Exception as e:
print(e)