Kmeans分類(角度分類)
上次的氣象雲分類還在更新中【點擊這裏】,先記錄一下這次在做攝像頭角度分類用的算法和代碼;
主要是針對攝像頭的角度不同使得產生的圖片也有差異,這裏主要使用的是Kmeans算法進行分類,具體如下:
1、用test.py生成相應的”knn_res1.csv”,之後用class.py文件進行分類
2、這裏的Kmeans算法利用sklearn模塊中自帶的Kmeans算法。進行處理時,先用SIFT讀取每個圖片的特徵,之後用Kmeans算法,得到相應的labels和聚類中心(這裏選取的聚類中心爲6,分類的種數根據自己要分多少類而定)
代碼:
# -*- encoding:utf-8 -*-
__date__ = '19/09/11'
'''
CV_INTER_NN - 最近鄰插值,
CV_INTER_LINEAR - 雙線性插值 (缺省使用)
CV_INTER_AREA - 使用象素關係重採樣。當圖像縮小時候,該方法可以避免波紋出現。當圖像放大時,類似於 CV_INTER_NN 方法..
CV_INTER_CUBIC - 立方插值
'''
import os, codecs
import cv2
import numpy as np
from sklearn.cluster import KMeans
import pandas as pd
import shutil
def get_file_name(path):
# '''
# Args: path to list; Returns: path with filenames
# '''
filenames = os.listdir(path)
path_filenames = []
filename_list = []
for file in filenames:
if not file.startswith('.'):
path_filenames.append(os.path.join(path, file))
filename_list.append(file)
return path_filenames
def knn_detect(file_list, cluster_nums, randomState=None):
features = []
files = file_list
sift = cv2.xfeatures2d.SIFT_create()
for file in files:
print(file)
img = cv2.imread(file)
img = cv2.resize(img, (32, 32), interpolation=cv2.INTER_CUBIC)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
print(gray.dtype)
_, des = sift.detectAndCompute(gray, None)
if des is None:
file_list.remove(file)
continue
reshape_feature = des.reshape(-1, 1)
features.append(reshape_feature[0].tolist())
input_x = np.array(features)
kmeans = KMeans(n_clusters=cluster_nums, random_state=randomState).fit(input_x)
return kmeans.labels_, kmeans.cluster_centers_
def res_fit(filenames, labels):
list = []
files = [file.split('/')[-1] for file in filenames]
for i in range(len(files)):
list.append([files[i], labels[i]])
# return dict(zip(files, labels))
return list
# 按照類別保存圖片
def save(path, filename, data):
file = os.path.join(path, filename)
with codecs.open(file, 'w', encoding='utf-8') as fw:
for f, l in data.items():
fw.write("{}\t{}\n".format(f, l))
def main():
path_filenames = get_file_name("C:/你的路徑/classes/samples_1/")
labels, cluster_centers = knn_detect(path_filenames, 6)
res_dict = res_fit(path_filenames, labels)
# save('./', 'knn_res1.txt', res_dict)
pd.DataFrame(res_dict).to_csv("knn_res1.csv")
if __name__ == "__main__":
main()
之後用生成的csv文件對數據集進行分類
import csv
import shutil
import os
target_path = "C:/你的路徑/classes/"
original_path = "C:/你的路徑/classes/samples_1/"
with open('C:/你的路徑/classes/knn_res1.csv', "rt", encoding="utf-8") as csvfile:
reader = csv.reader(csvfile)
lines = list(reader) #將reader讀取的變爲一個list,可以進行去除表的第一行、第一列操作
#rows= [row for row in reader]
#for row in rows:
for i in range(1,len(lines)):
if os.path.exists(target_path+lines[i][2]) :
full_path = original_path + lines[i][1]
shutil.move(full_path,target_path + lines[i][2] +'/')
else :
os.makedirs(target_path+lines[i][2])
full_path = original_path + lines[i][1]
shutil.move(full_path,target_path + lines[i][2] +'/')
之後在同目錄下就可以看到已經分好類的6個文件夾了