渣碩被畢設逼到最後,真的是逼急了。毫無信心地嘗試用sklearn的SVM做了個CU劃分問題的分類,週期很長,中間差點放棄,但是一想到如果放棄的話自己畢設只能寫出30幾頁,就堅持做完了。最後的效果未達到超過所有state-of-art的水平,但估計可以踩着畢設線畢業了。
1、數據集與數據預處理
數據集使用的是CPIH,一個專門爲intra編碼下的CU劃分問題建立的數據集。github鏈接如下。之前是看了他們的用CNN做CU劃分快速算法的paper,年前一直希望可以重現論文,遇到了很多困難。雖然最後還是沒做出效果因爲訓練結果真的不收斂。感謝paper作者北航小哥哥郵件裏熱心的回覆。https://github.com/HEVC-Projects/CPIH
下載數據集並按照說明提取了36個data集,是4個QP與3個CU尺寸的組合,再分別對應Train Test Valid。
之後用了個python腳本,將數據解析並將特徵提出來。
這裏主要提取了3個特徵,Variation,Block Flatness,SubVariation(不再詳細描述)。
腳本如下,:parseData是讀取數據集的數據,提出特徵,label,然後寫到文件裏的核心函數。在寫一些特徵的時候小心一點不要數據不要越界。然後沒啥大毛病了。
import re
import string
import os
import struct
import numpy as np
# get equal numbers of Positive Samples and Negtive Samples
def getSubBlock(cuData, cuSize):
t0 = 0
t1 = 0
t2 = 0
t3 = 0
total_num = cuSize * cuSize
half_num = total_num / 2
half_size = cuSize / 2
sub0 = np.arange(0, half_size * half_size)
sub1 = np.arange(0, half_size * half_size)
sub2 = np.arange(0, half_size * half_size)
sub3 = np.arange(0, half_size * half_size)
k = 0
while k < half_num:
i = 0
while i < half_size:
sub0[t0] = cuData[k]
t0 += 1
k += 1
i += 1
while i < cuSize:
sub1[t1] = cuData[k]
t1 += 1
k += 1
i += 1
while k < total_num:
i = 0
while i < half_size:
sub2[t2] = cuData[k]
t2 += 1
k += 1
i += 1
while i < cuSize:
sub3[t3] = cuData[k]
t3 += 1
k += 1
i += 1
return sub0, sub1, sub2, sub3
def getSCCD(cuData, size):
sub0, sub1, sub2, sub3 = getSubBlock(cuData, size)
var0 = sub0.var()
var1 = sub1.var()
var2 = sub2.var()
var3 = sub3.var()
var_mean = (var0 + var1 + var2 + var3) / 4
SCCD0 = (var0 - var_mean) **2
SCCD1 = (var1 - var_mean) **2
SCCD2 = (var2 - var_mean) **2
SCCD3 = (var3 - var_mean) **2
SCCD = (SCCD0 + SCCD1 + SCCD2 + SCCD3) / 4
return SCCD
def calBlockFlatness(cuData):
i = 0
temp = np.zeros(cuData.size, dtype=np.int64)
while i < cuData.size:
a = int(cuData[i])
temp[i] = int(a*a)
i += 1
t1 = float(cuData.sum()) / temp.sum()
t2 = float(cuData.sum()) / cuData.size
bf = t1 * t2
return bf
def getCuData(cuData, label):
i = 0
chunk = ""
while i < cuData.size:
chunk += str(cuData[i])
chunk += " "
i += 1
chunk += str(label) + "\n"
return chunk
def parseData(readData, writelabel, dirWritePicPath, size):
samplePos = 8000;
sampleNeg = 8000;
while samplePos > 0 or sampleNeg > 0:
label = readData.read(1)
if label != b'\x01' and label != b'\x00':
print("label Error!")
break
if label == "":
print("End Processing!")
break
if label == b'\x01':
if samplePos > 0:
label = 1
samplePos -= 1
else:
uselessData = readData.read(size * size)
continue
if label == b'\x00':
if sampleNeg > 0:
label = 0
sampleNeg -= 1
else:
uselessData = readData.read(size * size)
continue
cuData = readData.read(size * size)
cuData = np.frombuffer(cuData, dtype=np.uint8) # cuData = 0 ~ 255
SCCD = getSCCD(cuData, size)
cuMean = cuData.mean()
cuVar = cuData.var()
cuBF = calBlockFlatness(cuData)
#chunk = str(cuMean) + " " + str(cuVar) + " " + str(cuBF) + " " + str(label) + "\n"
#chunk = str(cuVar) + " " + str(cuBF) + " " + str(label) + "\n"
chunk = str(SCCD) + " " + str(cuVar) + " " + str(cuBF) + " " + str(label) + "\n"
#chunk = str(SCCD) + " " + str(cuBF) + " " + str(label) + "\n"
#chunk = str(SCCD) + " " + str(cuVar) + " " + str(label) + "\n"
#chunk = getCuData(cuData, label)
writelabel.write(chunk)
i = i + 1
print(samplePos)
print(sampleNeg)
def parseSet(dirReadData, seqName, cuSize):
readData = open(dirReadData, 'rb')
dirWritelabel = '/Users/mengwang/Documents/MyCode/Machine Learning/cuSplit/' + seqName + '_labels.data'
writelabel = open(dirWritelabel, 'w')
dirWriteDataPath = '/Users/mengwang/Documents/MyCode/Machine Learning/cuSplit/'+ seqName + '/'
parseData(readData, writelabel, dirWriteDataPath, cuSize)
readData.close()
writelabel.close()
def parseWrapper(cuSize, qp, dataSet):
print("Extracting CU"+ str(cuSize) + "_QP" + str(qp)+ "_" + dataSet + "...")
dirReadData = '/Users/mengwang/Documents/MyCode/Machine Learning/cuSplit/data/CU' + str(cuSize) + 'Samples_AI_CPIH_768_1536_2880_4928_qp' + str(qp) + '_' + dataSet + '.dat'
seqName='CU' + str(cuSize) + '_QP' + str(qp) + '_' + dataSet
parseSet(dirReadData, seqName, cuSize)
if __name__=="__main__":
parseWrapper(64, 22, "Train")
parseWrapper(64, 22, "Test")
提取的數據,具體的第一列是SCCD 第二列是Var 第三列是BF 第四列是劃分1/不劃分0
2、SVM分類
使用了sklearn的svm.LinearSVC,主要是考慮到算法複雜度、和集成難易程度(這一點我考慮的不對,C++版的libsvm可以直接集成過去的)選用線性SVM核函數。
下面的代碼是載入數據,並做標準化處理。
path = '/Users/mengwang/Documents/MyCode/Machine Learning/cuSplit/CU64_QP22_Train_labels.data'
data = np.loadtxt(path, dtype=str, delimiter=' ')
data = data.astype(float)
x_train, y_train = np.split(data, (3,), axis=1)
print(x_train.mean(axis=0),'mean of x_train')
print(x_train.std(axis=0), 'std of x_train')
x_train = preprocessing.scale(x_train, axis=0)
path = '/Users/mengwang/Documents/MyCode/Machine Learning/cuSplit/CU64_QP22_Test_labels.data'
data = np.loadtxt(path, dtype=str, delimiter=' ')
data = data.astype(float)
x_test, y_test = np.split(data, (3,), axis=1)
print(x_test.mean(axis=0), 'mean of x_test')
print(x_test.std(axis=0), 'std of x_test')
x_test = preprocessing.scale(x_test, axis=0)
下面開始訓練和打出結果
clf = svm.LinearSVC(random_state=3, multi_class='ovr',C=1, class_weight={1:1,0:1})
clf.fit(x_train, y_train.ravel())
y_pred_train = clf.predict(x_train)
print(accuracy_score(y_train, y_pred_train),'accuracy in train set')
y_pred = clf.predict(x_test)
print(accuracy_score(y_test, y_pred),'accuracy in test set') #accuracy_score only used in classification problem
print(clf.get_params())
print(clf.coef_)
print(clf.intercept_)
print(classification_report(y_test, y_pred, target_names=['non-split', 'split']))
後面使用兩個特徵來說明,特徵空間是有重疊的,選用的特徵是Var和SubVar,取log10以後畫散點圖,代碼和圖如下:
x_min, x_max = x_train[:, 0].min() - 1, x_train[:, 0].max() + 1
y_min, y_max = x_train[:, 1].min() - 1, x_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),np.arange(y_min, y_max, 0.1))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.xlabel("log10(SubVar)")
plt.ylabel("log10(Var)")
plt.contourf(xx, yy, Z, alpha=0.3)
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train.ravel(), alpha=0.7, marker='+', linewidths=10)
plt.show()
可以看出明顯的重疊啊,劃分誤差大編碼性能損失很嚴重的。所以如果用兩個SVM來分類的話,就像是這樣:
那就完美許多了。這裏是通過調整初始化模型的參數中的“class_weight”
冒號前代表類的標籤“1” “0”,冒號後對應的是其類的懲罰權重;默認情況下設爲1:1
如果設爲1:2, 0:1則代表“劃分”類的懲罰權重加大,意味着如果“不劃分”的類誤判爲“劃分”,Loss是翻倍的,所以{1:2, 0:1}結果相比於{1:1, 0:1},“不劃分”類的精確度是提高了的!同理,爲了提高“劃分”精度,可以給標籤0更大的權值。
之前對plot不太熟,現在看起來很套路嘛。如果想畫出剛剛那個“三分類的圖”,其實就是初始化兩個新的帶權重的SVM:clf和clf0,畫圖的代碼如下。
x_min, x_max = x_train[:, 0].min() - 1, x_train[:, 0].max() + 1
y_min, y_max = x_train[:, 1].min() - 1, x_train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
Z0 = clf0.predict(np.c_[xx.ravel(), yy.ravel()])
Z0 = Z0.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.3)
plt.contourf(xx, yy, Z0, alpha=0.3)
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train.ravel(), alpha=0.7, marker='+', linewidths=10)
plt.xlabel("log10(SubVar)")
plt.ylabel("log10(Var)")
plt.show()
3、HM中的映射
300行代碼;寫的是有點點亂;
因爲是不同的QP和不同的CU尺寸組合下的雙SVM,因此一共有4*3*2=24套係數;
需要注意的就是特徵同樣需要歸一化,減去均值,除以方差:X = (X_org -X_mean) / X_var;
均值方差使用的就是各種特徵在訓練集裏的均值方差;
最後發現每組雙SVM的係數接近於 僅需調整截距,斜率係數只是微調;這跟我最初只使用一個SVM然後通過加減1,手動調節上下邊界線的做法基本是一致的,當初沒想明白怎麼回事;
最終實現了全I幀0.8%的BD-rate loss 40%多的加速,希望可以畢業。
4、總結
從不知道方案是否可行、不會提數據、提取特徵遇到bug、不知道數據歸一化、訓練完SVM只有70%準確率這樣的SVM有沒有達到最優、還有哪裏是可以優化的空間、加到HM代碼裏優化極限又是什麼樣的、論文該怎麼寫等等都很心虛;