(Python版本)Relief算法有效的對特徵進行選擇

'''python2.7'''
import numpy as np
from random import randrange
from sklearn.preprocessing import normalize


def distanceNorm(Norm, D_value):
    # initialization

    # Norm for distance
    if Norm == '1':
        counter = np.absolute(D_value);
        counter = np.sum(counter);
    elif Norm == '2':
        counter = np.power(D_value, 2);
        counter = np.sum(counter);
        counter = np.sqrt(counter);
    elif Norm == 'Infinity':
        counter = np.absolute(D_value);
        counter = np.max(counter);
    else:
        raise Exception('We will program this later......');

    return counter;


def fit(features, labels, iter_ratio):
    # initialization
    (n_samples, n_features) = np.shape(features)
    distance = np.zeros((n_samples, n_samples))
    weight = np.zeros(n_features)

    if iter_ratio >= 1.0:
        # compute distance
        for index_i in range(0, n_samples):
            for index_j in range(index_i + 1, n_samples):
                D_value = features[index_i] - features[index_j]
                distance[index_i, index_j] = distanceNorm('2', D_value)
        distance += distance.T
    else:
        pass

    # start iteration
    for iter_num in range(0, int(iter_ratio * n_samples)):
        # print iter_num;
        # initialization
        nearHit = list()
        nearMiss = list()
        distance_sort = list()

        # random extract a sample
        index_i = randrange(0, n_samples, 1)
        self_features = features[index_i]

        # search for nearHit and nearMiss
        if iter_ratio >= 0.5:
            distance[index_i, index_i] = np.max(distance[index_i])  # filter self-distance
            for index in range(0, n_samples):
                distance_sort.append([distance[index_i, index], index, labels[index]])
        else:
            # compute distance respectively
            distance = np.zeros(n_samples)
            for index_j in range(0, n_samples):
                D_value = features[index_i] - features[index_j]
                distance[index_j] = distanceNorm('2', D_value)
            distance[index_i] = np.max(distance)  # filter self-distance
            for index in range(0, n_samples):
                distance_sort.append([distance[index], index, labels[index]])
        distance_sort.sort(key=lambda x: x[0])
        for index in range(0, n_samples):
            if nearHit == [] and distance_sort[index][2] == labels[index_i]:
                nearHit = features[distance_sort[index][1]]
            elif nearMiss == [] and distance_sort[index][2] != labels[index_i]:
                nearMiss = features[distance_sort[index][1]]
            elif nearHit != [] and nearMiss != []:
                break
            else:
                continue

        # update weight
        weight = weight - np.power(self_features - nearHit, 2) + np.power(self_features - nearMiss, 2)
    print(weight / (iter_ratio * n_samples))
    return weight / (iter_ratio * n_samples)



X = normalize(X=np.array([[1, 2, 3], [1, 3, 3], [1, 5, 4], [1, 2, 8], [1, 1, 9], [1, 2, 10]]), norm='l2', axis=0)
Y = [1, 1, 1, 0, 0, 0]
Y = np.array(Y)
for i in range(0, 100):
    weight = fit(X, Y, 1)
print(np.average(weight[0]), np.average(weight[1]), np.average(weight[2]))







發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章