ex7: K-Means與PCA

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.io import loadmat
import random

data = loadmat('ex7data2')
X = data['X']

def initialize(X,K=3):
    list = X.tolist()
    centroids = np.zeros(K)
    centroids = random.sample(list,K)
    return np.array(centroids)


def findClosestCentroids(X, centroids):
    d = np.zeros((X.shape[0],centroids.shape[0]))
    idx = np.zeros((X.shape[0],1))
    for i in range(centroids.shape[0]):
        for j in range(X.shape[0]):
            d[j,i] = np.linalg.norm(X[j,:] - centroids[i,:])**2
    for i in range(d.shape[0]):
        idx[i] = np.where( d[i,:] == np.min(d[i,:]))
    return idx

def computeMeans(X, idx, K):
    centroids = np.zeros((K,X.shape[1]))
    for i in range(K):
        centroids[i, :] = np.mean(X[np.ravel(idx == i), :], axis=0).reshape(1, -1)
    return centroids
"""
def kMeansInitCentroids(X,K):
    iterations = 10
    centroids = initialize(X, K=3)
    for inter in range(iterations):
        idx = findClosestCentroids(X, centroids)
        centroids = computeMeans(X, idx, K)
    return centroids
"""
def kMeansInitCentroids(X,K):
    centroids = initialize(X, K=3)
    idx = findClosestCentroids(X, centroids)
    centroids = computeMeans(X, idx, K)
    return centroids

def plot_data(X):
    plt.figure()
    plt.scatter(X[:,0],X[:,1],c='blue',marker='o')
    iterations = 10
    for i in range(iterations):
        now_centroids = kMeansInitCentroids(X, K=3)
        plt.plot(now_centroids[:,0],now_centroids[:,1],'->', linewidth = 5 )
    plt.show()
plot_data(X)

二。K-means壓縮圖片

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.io import loadmat
import random
import cv2 as cv
from matplotlib import colors
from scipy import io as spio
import scipy.misc

img = cv.imread(r'bird_small.png')
img_data = img/255.0

#把圖片數據變形成 Mx3(M 是圖片中像素點的數目)的向量
X = img.reshape((-1,3))


def initialize(X,K=16):
    list = X.tolist()
    centroids = np.zeros(K)
    centroids = random.sample(list,K)
    return np.array(centroids)


def findClosestCentroids(X, centroids):
    m = X.shape[0]
    K = centroids.shape[0]
    d = np.zeros((m,K))
    idx = np.zeros(m)
    for i in range(K):
        for j in range(m):
            d[j,i] = np.linalg.norm(X[j,:] - centroids[i,:])**2
    for i in range(m):
            # 返回最小值的列索引
            idx[i] = np.argmin(d[i,:])
    return idx

def computeMeans(X, idx, K):
    centroids = np.zeros((K,X.shape[1]))
    for i in range(K):
        centroids[i, :] = np.mean(X[np.ravel(idx == i), :], axis=0).reshape(1, -1)
    return centroids

def kMeansInitCentroids(X,K):
    iterations = 10
    centroids = initialize(X, K=16)
    for inter in range(iterations):
        print(u'迭代計算次數:%d' % (inter + 1))
    idx = findClosestCentroids(X, centroids)
    centroids = computeMeans(X, idx, K)
    return centroids


def plot_data(X):
    plt.figure()
    plt.subplot(1, 2, 1), plt.imshow(cv.cvtColor(img,cv.COLOR_BGR2RGB)), plt.title('original')
    plt.xticks([]), plt.yticks([])
    centroids = kMeansInitCentroids(X,K=16)
    print('K-Means運行結束\n')
    print('壓縮圖片...\n')
    compress_img = np.zeros((X.shape[0],centroids.shape[1]))
    idx = findClosestCentroids(X, centroids)
    # 取每個像素對應的聚類中心,並將像素值替換爲聚類中心的像素值
    for i in range(centroids.shape[0]):
        value = np.where(idx==i)[0]
        compress_img[value,:] = centroids[i,:]
    compress_img = compress_img.reshape((img_data.shape))
    #opencv顯示的數組類型必須爲uint8
    compress_img = compress_img.astype("uint8")
    plt.subplot(1, 2, 2), plt.imshow(cv.cvtColor(compress_img,cv.COLOR_BGR2RGB)), plt.title('Compress')
    plt.xticks([]), plt.yticks([])
    plt.show()

plot_data(X)

三。圖像PCA

import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt

data = loadmat('ex7data1.mat')
X = data['X']

def feature_normalize(X):
    m , n = X.shape
    for i in range(n):
        X[:,i] = ( X[:,i] - np.mean(X[:,i])) / np.std(X[:,i])
    return X


def PCA(X):
    X = feature_normalize(X)
    m = X.shape[0]
    Sigma =  ( (X.T).dot(X) )/m
    U,S,V = np.linalg.svd(Sigma)
    return U,S,V


def project_data(X,K):
    U, S, V = PCA(X)
    U_reduce = U[:, 0:K]
    Z = (X).dot(U_reduce)
    return Z

def recover_data(U,Z,K):
    U, S, V = PCA(X)
    U_reduce = U[:,0:K]
    return Z.dot(U_reduce.T)

def plot_data(X):
    plt.figure()
    plt.subplot(1, 3, 1),plt.scatter(X[:,0],X[:,1],c = 'blue' ,marker ='o'),plt.title('original')
    # 繪製原始圖像
    U, S, V = PCA(X)
    Z = project_data(X,K=1)
    X_rec = recover_data(U, Z, K=1)
    # 繪製恢復後圖像
    plt.subplot(1, 3, 2), plt.scatter(X_rec[:, 0], X_rec[:, 1], c='red', marker='+'),plt.title('from 1D to 2D')
    plt.xticks([-2,2]), plt.yticks([-2,2])
    # 可視化映射
    plt.subplot(1, 3, 3),plt.scatter(X[:, 0], X[:, 1], facecolors='none',c='blue', marker='o',s=20)
    plt.scatter(X_rec[:, 0], X_rec[:, 1],facecolors='none', c='red', marker='+',s=20)
    for i in range(X.shape[0]):
        plt.plot( (X[i,:][0], X_rec[i,:][0]),( X[i,:][1], X_rec[i,:][1]), 'k--')
    plt.title('draw line')

    plt.show()

plot_data(X)

四。人臉

from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt

data = loadmat('ex7faces.mat')
X = data['X']

def visual(data):
    m, n = data.shape
    example_width = np.int(np.round(np.sqrt(n)))
    example_height = np.int((n / example_width))
    display_rows = np.int(np.floor(np.sqrt(m)))
    display_cols = np.int(np.ceil(m / display_rows))
    pad = 1
    display_array = - np.ones((pad + display_rows * (example_height + pad),
                               pad + display_cols * (example_width + pad)))

    curr_ex = 0
    for j in np.arange(display_rows):
        for i in np.arange(display_cols):
            if curr_ex > m:
                break
            max_val = np.max(np.abs(X[curr_ex, :]))
            display_array[pad + j * (example_height + pad) + np.arange(example_height),
                          pad + i * (example_width + pad) + np.arange(example_width)[:, np.newaxis]] = \
                data[curr_ex].reshape((example_height, example_width)) / max_val
            curr_ex = curr_ex + 1
        if curr_ex > m:
            break

    plt.figure()
    plt.imshow(display_array, cmap='gray', extent=[-1, 1, -1, 1])
    plt.axis('off')
    plt.show()

def feature_normalize(X):
    m , n = X.shape
    for i in range(n):
        X[:,i] = ( X[:,i] - np.mean(X[:,i])) / np.std(X[:,i])
    return X


def PCA(X):
    X = feature_normalize(X)
    m = X.shape[0]
    Sigma =  ( (X.T).dot(X) )/m
    U,S,V = np.linalg.svd(Sigma)
    return U,S,V


def project_data(X,K):
    U, S, V = PCA(X)
    U_reduce = U[:, 0:K]
    Z = (X).dot(U_reduce)
    return Z

def recover_data(U,Z,K):
    U, S, V = PCA(X)
    U_reduce = U[:,0:K]
    return Z.dot(U_reduce.T)

visual(X[0:100,:])
# 降維後的圖像
U, S, V = PCA(X)
Z = project_data(X,K=100)
visual(Z[0:100,:])
# 恢復圖像
X_rec = recover_data(U, Z, K=100)
visual(X_rec[0:100,:])

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章