（Python版本）Relief算法有效的對特徵進行選擇

原創

2020-06-16 01:51

'''python2.7'''
import numpy as np
from random import randrange
from sklearn.preprocessing import normalize


def distanceNorm(Norm, D_value):
    # initialization

    # Norm for distance
    if Norm == '1':
        counter = np.absolute(D_value);
        counter = np.sum(counter);
    elif Norm == '2':
        counter = np.power(D_value, 2);
        counter = np.sum(counter);
        counter = np.sqrt(counter);
    elif Norm == 'Infinity':
        counter = np.absolute(D_value);
        counter = np.max(counter);
    else:
        raise Exception('We will program this later......');

    return counter;


def fit(features, labels, iter_ratio):
    # initialization
    (n_samples, n_features) = np.shape(features)
    distance = np.zeros((n_samples, n_samples))
    weight = np.zeros(n_features)

    if iter_ratio >= 1.0:
        # compute distance
        for index_i in range(0, n_samples):
            for index_j in range(index_i + 1, n_samples):
                D_value = features[index_i] - features[index_j]
                distance[index_i, index_j] = distanceNorm('2', D_value)
        distance += distance.T
    else:
        pass

    # start iteration
    for iter_num in range(0, int(iter_ratio * n_samples)):
        # print iter_num;
        # initialization
        nearHit = list()
        nearMiss = list()
        distance_sort = list()

        # random extract a sample
        index_i = randrange(0, n_samples, 1)
        self_features = features[index_i]

        # search for nearHit and nearMiss
        if iter_ratio >= 0.5:
            distance[index_i, index_i] = np.max(distance[index_i])  # filter self-distance
            for index in range(0, n_samples):
                distance_sort.append([distance[index_i, index], index, labels[index]])
        else:
            # compute distance respectively
            distance = np.zeros(n_samples)
            for index_j in range(0, n_samples):
                D_value = features[index_i] - features[index_j]
                distance[index_j] = distanceNorm('2', D_value)
            distance[index_i] = np.max(distance)  # filter self-distance
            for index in range(0, n_samples):
                distance_sort.append([distance[index], index, labels[index]])
        distance_sort.sort(key=lambda x: x[0])
        for index in range(0, n_samples):
            if nearHit == [] and distance_sort[index][2] == labels[index_i]:
                nearHit = features[distance_sort[index][1]]
            elif nearMiss == [] and distance_sort[index][2] != labels[index_i]:
                nearMiss = features[distance_sort[index][1]]
            elif nearHit != [] and nearMiss != []:
                break
            else:
                continue

        # update weight
        weight = weight - np.power(self_features - nearHit, 2) + np.power(self_features - nearMiss, 2)
    print(weight / (iter_ratio * n_samples))
    return weight / (iter_ratio * n_samples)



X = normalize(X=np.array([[1, 2, 3], [1, 3, 3], [1, 5, 4], [1, 2, 8], [1, 1, 9], [1, 2, 10]]), norm='l2', axis=0)
Y = [1, 1, 1, 0, 0, 0]
Y = np.array(Y)
for i in range(0, 100):
    weight = fit(X, Y, 1)
print(np.average(weight[0]), np.average(weight[1]), np.average(weight[2]))

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

（Python版本）Relief算法有效的對特徵進行選擇

使用c#強大的表達式樹實現對象的深克隆之解決循環引用的問題

GPT-4o 引領人機交互新風向，向量數據庫賽道沸騰了

free AI online tools All In One

痞子衡嵌入式：恩智浦i.MX RT1xxx系列MCU啓動那些事（12.A）- uSDHC eMMC啓動時間(RT1170)

基於Ubuntu-22.04安裝K8s-v1.28.2實驗（二）使用kube-vip實現集羣VIP訪問

企業大模型如何成爲自己數據的“百科全書”？

本地SSL證書過期輸入命令在IIS自動生成

.NET週刊【5月第2期 2024-05-12】

基於Ubuntu-22.04安裝K8s-v1.28.2實驗（一）部署K8s

基於Ubuntu-22.04安裝K8s-v1.28.2實驗（三）數據卷掛載NFS（網絡文件系統）

利用自編碼器對線性模型參數加密

簡易版蒙特卡洛採樣計算二項式分佈的參數

整數拆分(python)

針對神經網絡的快速符號攻擊

基於pytorch線性判決器（不使用nn.moudle）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結