tsfresh是开源的提取时序数据特征的python包,能够提取出超过4000种特征 .
1. 查看数据
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from tsfresh.example.robot_execution_failures import download_robot_execution_failures, load_robot_execution_failures
from tsfresh import extract_features, extract_relevant_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import ComprehensiveFCParameters
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
download_robot_execution_failures()
df, y = load_robot_execution_failures()
df.head()
df[df.id == 2][['time','F_x','F_y','F_z','T_x','T_y',
'T_z']].plot(x='time', title='Success example (id 2)', figsize = (12, 6))
df[df.id == 25][['time','F_x','F_y','F_z','T_x','T_y',
'T_z']].plot(x='time', title='Failure example (id 25)', figsize = (12, 6))
2. 时间序列特征提取
extraction_settings = ComprehensiveFCParameters()
X = extraction_features(df, column_id='id', column_sort='time',
default_fc_parameters=extraction_settings, impute_function=impute)
print(X.shape)
X.head()
X.info()
- 特征过滤
X_filtered = extract_relevant_features(df, y, column_id='id', column_sort='time',
default_fc_parameters=extraction_settings)
print(X_filtered.shape)
X_filtered.head()
X_filtered.info()
3. 用决策树训练,预测和评估模型
X_train,X_test,X_filtered_train,X_filtered_test,y_train,y_test = train_test_split(X,X_filtered,y,test_size=0.3)
dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
print(classification_report(y_test, dtc.predict(X_test)))
dtc.n_features_
4524
dtc2 = DecisionTreeClassifier()
dtc2.fit(X_filtered_train, y_train)
print(classification_report(y_test, dtc2.predict(X_filtered_test)))
dtc2.n_features_
621