from numpy import *
import operator

分類

def classify0(inX,dataSet,labels,k): #用於分類的是inX，用於訓練的是dataSet
    dataSetSize = dataSet.shape[0] # 獲取dataSet矩陣的行數
    diffMat =tile(inX,(dataSetSize,1))-dataSet #將行向量inX複製dataSetSize行，求inX與每一個實例之間的距離
    sqDiffMat = diffMat**2 #對距離求平方
    sqDistance = sqDiffMat.sum(axis=1) # 距離平方和 axis=0列相加 =1行相加
    distances = sqDistance**0.5 #開根號
    sortedDistIndicies = distances.argsort() #取距離的索引(從小到大)
    classCount ={} #生成一個用於存放label及其對應次數的字典
    for i in range(k): 
        voteIlabel = labels[sortedDistIndicies[i]] #取前k個距離對應的label
        classCount[voteIlabel] =classCount.get(voteIlabel,0)+1 #取label對應的次數，加1。沒有時默認取0加1
    sortedClassCount =sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)
    #這裏的知識點有：
    #dict.iteritems() 返回迭代器，佔內存少，但是訪問慢   ps：dict.item()返回列表，佔內存多，訪問快
    #operator.itemgetter(item)  返回一個可調用的對象，取操作對象的items 
    #eg：f = itemgetter(2)， call f(r) returns r[2]
    #sorted(iterable[, cmp[, key[, reverse]]])
    # iterable 是進行排序的list或者iterator
    # cmp 是進行比較的函數
    # key是取待排序元素的哪一項進行排序
    # reverse默認爲false升序，true爲降序
    # https://docs.python.org/2/library/functions.html?highlight=sorted#sorted
    return sortedClassCount[0][0]

數據預處理

def file2matrix(filename):
    fhand = open(filename)
    arrayOfLines = fhand.readlines() 
    # read() 一次性讀取成一個字符串，換行顯示錶示成\n
    # readlines() 一次性讀取，按行解析成列表
    # readline() 一次讀取一行，沒有足夠內存一次性讀取使用
    numOfLines = len(arrayOfLines)
    returnMat = zeros((numOfLines,3))
    # numpy.zeros(shape, dtype=float, order='C') 返回一個新的array ，可以是list，也可以是mmatrix
    classLabelVector = []
    index = 0
    for line in arrayOfLines:
        line = line.strip() 
        listFromLine = line.split("\t") # 有4個元素，前三個是特徵，最後一個是target
        returnMat[index,:] = listFromLine[0:3]
        classLabelVector.append(int(listFromLine[-1])) #用int的原因是元素存儲爲整形，否則會被python認爲是字符串
        index += 1
    return returnMat,classLabelVector

reload(knn)
datingDataMat,datingLabels = knn.file2matrix("datingTestSet2.txt")

import matplotlib 
import matplotlib.pyplot as plt

fig = plt.figure()  #新建一個Figure畫布對象
ax = fig.add_subplot(111)  #將整個畫布分成1行1列，在第一個塊區域裏面生成Axes對象
ax.scatter(xDataSet,yDataSet)
#matplotlib.pyplot.scatter(x, y, s=None, c=None, marker=None, cmap=None, norm=None, vmin=None, vmax=None, alpha=None, linewidths=None, verts=None, edgecolors=None, hold=None, data=None, **kwargs)
plt.show()

數據歸一化

newValue = (oldValue - minValue)/(max-min)

def autoNorm(dataSet):
    minVals = dataSet.min(0) #取每列的最小值，參數爲1取行
    maxVals = dataSet.max(0) #取每列的最大值
    ranges = maxVals - minVals   # 區間距離,分母
    normDataSet =zeros(shape(dataSet))  #新建一個同dataSet行列的0陣來存放歸一化後的矩陣
    m = dataSet.shape[0] #取行數
    normDataSet = dataSet - tile(minVals,(m,1)) #分子biao'shi'ju'ti'de'zhi'xiang
    normDataSet = normDataSet /tile(ranges,(m,1))  #想起來matlab裏面./和/的差別。在numpy中，matrix/matrix只表示
    return normDataSet,ranges,minVals

reload(knn)
normMat,ranges,minVals = knn.autoNorm(datingDataMat)

測試

def datingClassTest():
    hoRatio = 0.10
    datingDataMat,datingLabels = file2matrix('datingTestSet2.txt')
    normMat,ranges,minVals = autoNorm(datingDataMat)
    m=normMat.shape[0] # 行數，即樣例總數
    numTestVecs = int(m*hoRatio)  #樣例的10%作爲測試樣例
    errorCount=0.0
    for i in range(numTestVecs):
        classifierResult = classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs,:],3)
        #測試數據需要隨機選擇，而數據本身並沒有特定的排序，所以可以直接選擇前面的10%行來作爲測試數據，後面的作爲訓練集
        print "the classifier came back with:%d,the real answer is %d" % (classifierResult,datingLabels[i])
        #print 輸入帶精度數字的用法 print "%d/f" %(variable)   %r是萬能符
        if(classifierResult!=datingLabels[i]):errorCount +=1.0
    print "the total error rate is %f" % (errorCount/float(numTestVecs))

預測函數

def classifyPerson():
    resultList = ["not at all","in small doses","in large doses"] #結果列表
    percenrTats=float(raw_input("percentage of time spent playing video games?")) #輸入參數1
    ffMiles = float(raw_input("frequent flier miles earned per year")) #輸入參數2
    iceCream = float(raw_input("liters of ice cream consumed per year")) #輸入參數3

    datingDataMat,datingLabels = file2matrix("datingTestSet2.txt") #獲取訓練集，標籤
    normMat,ranges,minVals = autoNorm(datingDataMat) # 歸一化
    inArr =array([ffMiles,percenrTats,iceCream])  # 輸入參數向量化
    classifierResult = classify0((inArr-minVals)/ranges,normMat,datingLabels,3)  #分類
    print "you will probably like this person:",resultList[classifierResult-1]  #輸出結果

ML in action代碼學習/CH02 KNN/約會網站配對效果改進

分類

數據預處理

數據歸一化

測試

預測函數

C語言--右移左移

12款高效開源Wiki系統推薦，打造團隊知識管理利器

一個開源且全面的C#算法實戰教程

dotnet 基於 DirectML 控制檯運行 Phi-3 模型

自定義MyBatis插件

一款.NET開源、功能強大、跨平臺的繪圖庫 - OxyPlot

常用的 Git 指令

sm4加密工具類

Coursera ML筆記 -----week3 Logistic Regression

Coursera ML筆記 -----week4 Neural Network -1

Coursera ML筆記 -----week5 Neural Network，Back Propagation

概率論2---參數估計

Coursera ML筆記 -----week9-1 異常檢測

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結