from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from active_learning.sampler import hierarchical_clustering_AL
from sklearn.metrics import accuracy_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import precision_score
def select_batch(sampler, N, already_selected,
**kwargs):
kwargs["N"] = N
kwargs["already_selected"] = already_selected
batch_AL = sampler.select_batch(**kwargs)
return batch_AL
def HSAL(X,y,budget):
AccList = []
KappaList = []
WAP_list = []
initialNum = 2
SEED = 100
model = LogisticRegression(random_state=100,multi_class='multinomial',solver="lbfgs", max_iter=200)
KF = KFold(n_splits=10,shuffle=True)
for train_idx,test_idx in KF.split(X):
m = len(train_idx)
indices = np.arange(m)
X_train = X[train_idx]
y_train = y[train_idx]
X_test = X[test_idx]
y_test = y[test_idx]
initialPoints = np.random.choice(indices,initialNum,replace=False) ##類型爲ndarray
while len(set(y_train[list(initialPoints)])) == 1:
initialPoints = np.random.choice(indices, initialNum, replace=False)
selected_inds = list(initialPoints)
print("初始樣本的標記===",y_train[selected_inds])
sampler = hierarchical_clustering_AL.HierarchicalClusterAL(X_train,y_train,seed=SEED)
queryNum = budget - initialNum
for b in range(queryNum):
batch = 1
selected_batch_inputs = {"labeled": dict(zip(selected_inds, y_train[selected_inds])), "y": y_train}
new_selected = select_batch(sampler,batch,selected_inds,**selected_batch_inputs)
selected_inds.extend(new_selected)
model.fit(X_train[selected_inds],y_train[selected_inds])
y_pred = model.predict(X_test)
acc = accuracy_score(y_test,y_pred)
print("精度===",acc)
AccList.append(acc)
return AccList
if __name__ == '__main__':
X,y = datasets.load_iris(return_X_y=True)
budget = 50
AccList = HSAL(X, y, budget)
print(AccList)
[1] Dasgupta S, Hsu D. Hierarchical sampling for active learning[C]//Proceedings of the 25th international conference on Machine learning. ACM, 2008: 208-215.
在intelligent-annotation 的基礎上做了一個殼。
有興趣的要代碼的留言吧~