from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from active_learning.sampler import hierarchical_clustering_AL
from sklearn.metrics import accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import precision_score
def select_batch(sampler, N, already_selected,
**kwargs):
kwargs["N"] = N
kwargs["already_selected"] = already_selected
batch_AL = sampler.select_batch(**kwargs)
return batch_AL
def HSAL(X,y,budget):
AccList = []
KappaList = []
WAP_list = []
BudgetCut_list = []
classNum = len(set(y))
initialNum = 1
SEED = 100
model = LogisticRegression(random_state=100,multi_class='multinomial',solver="lbfgs", max_iter=200)
KF = KFold(n_splits=10,shuffle=True)
for train_idx,test_idx in KF.split(X):
budgetCut = 0
label_obtain = []
m = len(train_idx)
# indices = np.arange(m)
indices = list(range(m))
X_train = X[train_idx]
y_train = y[train_idx]
X_test = X[test_idx]
y_test = y[test_idx]
initialPoints = np.random.choice(indices,initialNum,replace=False) ##類型爲ndarray
label_obtain.append(y_train[initialPoints[0]])
indices.remove(initialPoints[0])
while len(set(y_train[list(initialPoints)])) == 1:
temPoint =np.random.choice(indices, 1, replace=False)
initialPoints = np.concatenate((initialPoints,temPoint))
initialNum += 1
label_obtain.append(y_train[temPoint[0]])
if len(np.unique(label_obtain)) == classNum and budgetCut == 0:
budgetCut = len(label_obtain)
selected_inds = list(initialPoints)
sampler = hierarchical_clustering_AL.HierarchicalClusterAL(X_train,y_train,seed=SEED)
queryNum = budget - initialNum
for b in range(queryNum):
batch = 1
selected_batch_inputs = {"labeled": dict(zip(selected_inds, y_train[selected_inds])), "y": y_train}
new_selected = select_batch(sampler,batch,selected_inds,**selected_batch_inputs)
label_obtain.append(y_train[new_selected[0]])
if budgetCut == 0 and len(np.unique(label_obtain)) == classNum:
budgetCut = len(label_obtain)
selected_inds.extend(new_selected)
model.fit(X_train[selected_inds],y_train[selected_inds])
y_pred = model.predict(X_test)
acc = accuracy_score(y_test,y_pred)
kappa = cohen_kappa_score(y_test,y_pred)
wap = precision_score(y_test,y_pred,average='weighted')
print("精度===",acc)
AccList.append(acc)
KappaList.append(kappa)
WAP_list.append(wap)
BudgetCut_list.append(budgetCut)
#####selected_inds 是選擇的初始樣本
return AccList,KappaList,WAP_list,BudgetCut_list
if __name__ == '__main__':
X,y = datasets.load_iris(return_X_y=True)
budget = 30
AccList,KappaList,WAP_list,BudgetCut_list = HSAL(X, y, budget)
print("平均精度=",np.mean(AccList),"精度標準差=",np.std(AccList))
print("平均kappa=",np.mean(KappaList),"kappa標準差=",np.std(KappaList))
print("平均WAP=",np.mean(WAP_list),"WAP標準差=",np.std(WAP_list))
print("平均截斷預算", np.mean(BudgetCut_list), "截斷預算標準差", np.std(BudgetCut_list))