使用數據集爲Kddcup99的10%數據集。
以下代碼需要先把下載下來的文件變成txt格式,然後新建一個
kddcup.data_10_percent_corrected 文件。接着運行即可。文件路徑都是絕對路徑可以根據自己的文件位置更改。
代碼爲:
import pandas as pd
col_names = ["duration","protocol_type","service","flag","src_bytes",
"dst_bytes","land","wrong_fragment","urgent","hot","num_failed_logins",
"logged_in","num_compromised","root_shell","su_attempted","num_root",
"num_file_creations","num_shells","num_access_files","num_outbound_cmds",
"is_host_login","is_guest_login","count","srv_count","serror_rate",
"srv_serror_rate","rerror_rate","srv_rerror_rate","same_srv_rate",
"diff_srv_rate","srv_diff_host_rate","dst_host_count","dst_host_srv_count",
"dst_host_same_srv_rate","dst_host_diff_srv_rate","dst_host_same_src_port_rate",
"dst_host_srv_diff_host_rate","dst_host_serror_rate","dst_host_srv_serror_rate",
"dst_host_rerror_rate","dst_host_srv_rerror_rate","label"]#42個標識
data = pd.read_table("kddcup.data_10_percent_corrected.txt",header=None, sep=',',names = col_names)
print(data.head(10))#查看前10行
data.to_csv("kddcup.data_10_percent_corrected.csv")#另存爲csv文件