import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.io import loadmat
import random
data = loadmat('ex7data2')
X = data['X']
def initialize(X,K=3):
list = X.tolist()
centroids = np.zeros(K)
centroids = random.sample(list,K)
return np.array(centroids)
def findClosestCentroids(X, centroids):
d = np.zeros((X.shape[0],centroids.shape[0]))
idx = np.zeros((X.shape[0],1))
for i in range(centroids.shape[0]):
for j in range(X.shape[0]):
d[j,i] = np.linalg.norm(X[j,:] - centroids[i,:])**2
for i in range(d.shape[0]):
idx[i] = np.where( d[i,:] == np.min(d[i,:]))
return idx
def computeMeans(X, idx, K):
centroids = np.zeros((K,X.shape[1]))
for i in range(K):
centroids[i, :] = np.mean(X[np.ravel(idx == i), :], axis=0).reshape(1, -1)
return centroids
"""
def kMeansInitCentroids(X,K):
iterations = 10
centroids = initialize(X, K=3)
for inter in range(iterations):
idx = findClosestCentroids(X, centroids)
centroids = computeMeans(X, idx, K)
return centroids
"""
def kMeansInitCentroids(X,K):
centroids = initialize(X, K=3)
idx = findClosestCentroids(X, centroids)
centroids = computeMeans(X, idx, K)
return centroids
def plot_data(X):
plt.figure()
plt.scatter(X[:,0],X[:,1],c='blue',marker='o')
iterations = 10
for i in range(iterations):
now_centroids = kMeansInitCentroids(X, K=3)
plt.plot(now_centroids[:,0],now_centroids[:,1],'->', linewidth = 5 )
plt.show()
plot_data(X)
二。K-means壓縮圖片
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.io import loadmat
import random
import cv2 as cv
from matplotlib import colors
from scipy import io as spio
import scipy.misc
img = cv.imread(r'bird_small.png')
img_data = img/255.0
#把圖片數據變形成 Mx3(M 是圖片中像素點的數目)的向量
X = img.reshape((-1,3))
def initialize(X,K=16):
list = X.tolist()
centroids = np.zeros(K)
centroids = random.sample(list,K)
return np.array(centroids)
def findClosestCentroids(X, centroids):
m = X.shape[0]
K = centroids.shape[0]
d = np.zeros((m,K))
idx = np.zeros(m)
for i in range(K):
for j in range(m):
d[j,i] = np.linalg.norm(X[j,:] - centroids[i,:])**2
for i in range(m):
# 返回最小值的列索引
idx[i] = np.argmin(d[i,:])
return idx
def computeMeans(X, idx, K):
centroids = np.zeros((K,X.shape[1]))
for i in range(K):
centroids[i, :] = np.mean(X[np.ravel(idx == i), :], axis=0).reshape(1, -1)
return centroids
def kMeansInitCentroids(X,K):
iterations = 10
centroids = initialize(X, K=16)
for inter in range(iterations):
print(u'迭代計算次數:%d' % (inter + 1))
idx = findClosestCentroids(X, centroids)
centroids = computeMeans(X, idx, K)
return centroids
def plot_data(X):
plt.figure()
plt.subplot(1, 2, 1), plt.imshow(cv.cvtColor(img,cv.COLOR_BGR2RGB)), plt.title('original')
plt.xticks([]), plt.yticks([])
centroids = kMeansInitCentroids(X,K=16)
print('K-Means運行結束\n')
print('壓縮圖片...\n')
compress_img = np.zeros((X.shape[0],centroids.shape[1]))
idx = findClosestCentroids(X, centroids)
# 取每個像素對應的聚類中心,並將像素值替換爲聚類中心的像素值
for i in range(centroids.shape[0]):
value = np.where(idx==i)[0]
compress_img[value,:] = centroids[i,:]
compress_img = compress_img.reshape((img_data.shape))
#opencv顯示的數組類型必須爲uint8
compress_img = compress_img.astype("uint8")
plt.subplot(1, 2, 2), plt.imshow(cv.cvtColor(compress_img,cv.COLOR_BGR2RGB)), plt.title('Compress')
plt.xticks([]), plt.yticks([])
plt.show()
plot_data(X)
三。圖像PCA
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
data = loadmat('ex7data1.mat')
X = data['X']
def feature_normalize(X):
m , n = X.shape
for i in range(n):
X[:,i] = ( X[:,i] - np.mean(X[:,i])) / np.std(X[:,i])
return X
def PCA(X):
X = feature_normalize(X)
m = X.shape[0]
Sigma = ( (X.T).dot(X) )/m
U,S,V = np.linalg.svd(Sigma)
return U,S,V
def project_data(X,K):
U, S, V = PCA(X)
U_reduce = U[:, 0:K]
Z = (X).dot(U_reduce)
return Z
def recover_data(U,Z,K):
U, S, V = PCA(X)
U_reduce = U[:,0:K]
return Z.dot(U_reduce.T)
def plot_data(X):
plt.figure()
plt.subplot(1, 3, 1),plt.scatter(X[:,0],X[:,1],c = 'blue' ,marker ='o'),plt.title('original')
# 繪製原始圖像
U, S, V = PCA(X)
Z = project_data(X,K=1)
X_rec = recover_data(U, Z, K=1)
# 繪製恢復後圖像
plt.subplot(1, 3, 2), plt.scatter(X_rec[:, 0], X_rec[:, 1], c='red', marker='+'),plt.title('from 1D to 2D')
plt.xticks([-2,2]), plt.yticks([-2,2])
# 可視化映射
plt.subplot(1, 3, 3),plt.scatter(X[:, 0], X[:, 1], facecolors='none',c='blue', marker='o',s=20)
plt.scatter(X_rec[:, 0], X_rec[:, 1],facecolors='none', c='red', marker='+',s=20)
for i in range(X.shape[0]):
plt.plot( (X[i,:][0], X_rec[i,:][0]),( X[i,:][1], X_rec[i,:][1]), 'k--')
plt.title('draw line')
plt.show()
plot_data(X)
四。人臉
from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt
data = loadmat('ex7faces.mat')
X = data['X']
def visual(data):
m, n = data.shape
example_width = np.int(np.round(np.sqrt(n)))
example_height = np.int((n / example_width))
display_rows = np.int(np.floor(np.sqrt(m)))
display_cols = np.int(np.ceil(m / display_rows))
pad = 1
display_array = - np.ones((pad + display_rows * (example_height + pad),
pad + display_cols * (example_width + pad)))
curr_ex = 0
for j in np.arange(display_rows):
for i in np.arange(display_cols):
if curr_ex > m:
break
max_val = np.max(np.abs(X[curr_ex, :]))
display_array[pad + j * (example_height + pad) + np.arange(example_height),
pad + i * (example_width + pad) + np.arange(example_width)[:, np.newaxis]] = \
data[curr_ex].reshape((example_height, example_width)) / max_val
curr_ex = curr_ex + 1
if curr_ex > m:
break
plt.figure()
plt.imshow(display_array, cmap='gray', extent=[-1, 1, -1, 1])
plt.axis('off')
plt.show()
def feature_normalize(X):
m , n = X.shape
for i in range(n):
X[:,i] = ( X[:,i] - np.mean(X[:,i])) / np.std(X[:,i])
return X
def PCA(X):
X = feature_normalize(X)
m = X.shape[0]
Sigma = ( (X.T).dot(X) )/m
U,S,V = np.linalg.svd(Sigma)
return U,S,V
def project_data(X,K):
U, S, V = PCA(X)
U_reduce = U[:, 0:K]
Z = (X).dot(U_reduce)
return Z
def recover_data(U,Z,K):
U, S, V = PCA(X)
U_reduce = U[:,0:K]
return Z.dot(U_reduce.T)
visual(X[0:100,:])
# 降維後的圖像
U, S, V = PCA(X)
Z = project_data(X,K=100)
visual(Z[0:100,:])
# 恢復圖像
X_rec = recover_data(U, Z, K=100)
visual(X_rec[0:100,:])