文章目錄
1.複寫keras_faster rcnn
1.1 解析xml信息
import xml.etree.ElementTree as ET
def parse_label(xml_file):
#建立一個實例
tree = ET.parse(xml_file) # parse解析
#建立跟路徑
root = tree.getroot()
width = root.find('size').find('width').text # 一張照片這3個量都爲1個不變
height = root.find('size').find('height').text
image_name = root.find('filename').text
#數據列表,一列爲一組數據
category = []
xmin = []
ymin = []
xmax = []
ymax = []
for object in root.findall('object'): # 因爲object有多個
for x in object.iter('name'): # name就是category多個
category.append(x.text)
xmax.append(object.find('bndbox').find('xmax').text)
ymax.append(object.find('bndbox').find('ymax').text)
xmin.append(object.find('bndbox').find('xmin').text)
ymin.append(object.find('bndbox').find('ymin').text)
#列表組合
ground_truth_box = [list(box) for box in zip(xmin,ymin,xmax,ymax)]
return image_name,(width,height),category,ground_truth_box
import glob # 讀文件夾也可以:for i in os.dir glob可以帶路徑輸出
for name in glob.glob('/Users/mikegao/Desktop/Annotation/*'):
print (parse_label(name),'\n')
或
:
import pandas as pd
list_table =[]
for i in range(10,20):
date = {'name':i+1,"age":i}
list_table.append(date) # 將data append進去
print (list_table)
# 下面data來自list_table
data_frame = pd.DataFrame(data=list_table,columns=['age','name'])
data_frame.to_csv('321.csv',index=False,mode='a',header=True)
輸出的 list_table:
輸出的 data_frame:
1.2 Anchor生成
下圖box[0]爲x,box[1]爲y
結果爲:(9,256), 所以有256個centerx(中心點),每個裏面有9個框,256*9=2304
1.3 多輸出多輸入(函數式)
1.4 resnet
下圖stage1裏conv和maxpool都會做一次下采樣
import keras.layers as KL
from keras.models import Model
import keras.backend as K
import tensorflow as tf
from keras.utils import np_utils
from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os
%matplotlib inline
from keras.datasets import mnist
#創建resnet網絡結構
def building_block(filters,block): #filters數量根據stage會增加,不能寫死在網絡裏
#判斷block1和2
if block != 0: # 不=0的話就是block2,枚舉
stride = 1
else: #如果等於0 採用stride 2,兩倍下采樣 也就是 如果是 building1 使用2倍下采樣
stride = 2
def f(x):
#主路
y = KL.Conv2D(filters=filters,kernel_size=(1,1),strides=stride)(x)
# 原文一開始kernel_size=(3,3),但28*28圖卷積沒了
y = KL.BatchNormalization(axis=3)(y) #[-1,28,28,3] axis=3對應下標3即3
y = KL.Activation('relu')(y)
#注意第二層沒有stride使用padding same就是保證size相同
y = KL.Conv2D(filters=filters, kernel_size=(3, 3), padding='same')(y)
y = KL.BatchNormalization(axis=3)(y)
y = KL.Activation('relu')(y)
y = KL.Conv2D(filters=4*filters,kernel_size=(1,1))(y)
y = KL.BatchNormalization(axis=3)(y)
#副路
#判斷是哪個block 設定不同的 shortcut支路參數
if block == 0 : #如果是0 那麼就是block1的通路
shortcut = KL.Conv2D(filters=4*filters,kernel_size=(1,1),strides=stride)(x)
shortcut = KL.BatchNormalization()(shortcut)
else:
#如果不等於0 那就是block2 那麼就直接接input的tensor
shortcut = x
#主通路和副路shortcut 相加
y = KL.Add()([y,shortcut]) #y主 shortcut支路 直接通過add層相加
y = KL.Activation('relu')(y)
return y
return f
#resnet fp提取
def ResNet_Extractor(X_train, Y_train,X_test,Y_test):
# 頭部,TOP:
# customize your top input
input = KL.Input([28,28,1])
x = KL.Conv2D(filters=64,kernel_size=(3,3),padding='same')(input)
x = KL.BatchNormalization(axis=3)(x)
x = KL.Activation('relu')(x)
# 身體,主要部分:控制building_block
# 每個stage要有不同的 block 12222的數量 ,還有第一個Block1 輸入維度後邊要迭代(stage)
filters = 64
block = [2,2] # stage0裏有2個block,stage1裏也有2個block
for i,block_num in enumerate(block):
print ('---stage--', str(i) ,'---')
for block_id in range(block_num):
print('---block--', str(block_id) ,'---')
x = building_block(filters=filters,block=block_id)(x)
filters *= 2 #每個stage double filter個數
# 注意這裏的輸出就是我們常規的featuremap的輸出x,不包含下面尾部
# 尾部,輸出:
x = KL.AveragePooling2D(pool_size=(2, 2))(x)
x = KL.Flatten()(x)
x = KL.Dense(units=10, activation='softmax')(x
# 定義輸入輸出
model = Model(inputs=input, outputs=x)
# 打印網絡
print (model.summary())
plot_model(model,to_file='resMnist6-9-2.png',show_shapes=True)
# 編譯網絡
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
history = model.fit( # 想要打印出來所有做個變量history
X_train,
Y_train,
epochs=6,
batch_size=200,
verbose=1,
validation_data = (X_test,Y_test),
)
model.save('./mnistRES2.h5')
#打印graph
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title("model loss")
plt.ylabel("loss")
plt.xlabel("epoch")
plt.legend(["train","test"],loc="upper left")
plt.show()
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
return model
def main():
(X_train,Y_train),(X_test,Y_test) = mnist.load_data()
print(X_train.shape)
X_train = X_train.reshape(-1,28,28,1)
X_test = X_test.reshape(-1,28,28,1)
print (X_train.shape)
#-set type into float32 設置成浮點型
X_train = X_train.astype('float32') #astype SET AS TYPE into
X_test = X_test.astype('float32')
X_train = X_train/255.0
X_test /=255.0
#Class vectors [0,0,0,0,0,0,0,1(7),0,0] #轉成二進制
Y_test = np_utils.to_categorical(Y_test,10) #定義LABEL類數量
Y_train = np_utils.to_categorical(Y_train,10)
ResNet_Extractor(X_train, Y_train,X_test,Y_test)
main()
輸出:
測試:
from keras.models import load_model
import matplotlib.image as processimage
import matplotlib.pyplot as plt
import numpy as np
model = load_model('mnistRES2.h5')
image = processimage.imread('pred_image/3.jpg')
# plt.imshow(image)
# plt.show()
image_to_array = np.array(image)
image_to_array = image_to_array.reshape(-1,28,28,1)
prediction = model.predict(image_to_array)
Final_prediction = [result.argmax() for result in prediction]
print (Final_prediction,prediction)
1.5 rpn
import keras.layers as KL
from keras.models import Model
import keras.backend as K
import keras
import tensorflow as tf
from keras.utils import plot_model
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline
def building_block(filters,block):
# import random
# bn = random.randint(100,300,)
#判斷block1和2
if block != 0: #如果不等於0 那麼使用 stride=1
stride = 1
else: #如果等於0 採用stride 2 兩倍下采樣 也就是 如果是 building1 使用2倍下采樣
stride = 2
def f(x):
#主通路結構
y = KL.Conv2D(filters=filters,kernel_size=(1,1),strides=stride)(x)
y = KL.BatchNormalization(axis=3)(y)
y = KL.Activation('relu')(y)
y = KL.Conv2D(filters=filters, kernel_size=(3, 3), padding='same')(y) #注意這裏沒有stride使用padding same就是保證size相同
y = KL.BatchNormalization(axis=3)(y)
y = KL.Activation('relu')(y)
#主通路輸出
y = KL.Conv2D(filters=4*filters,kernel_size=(1,1))(y)
y = KL.BatchNormalization(axis=3)(y)
#判斷是哪個block 設定不同的 shortcut支路參數
if block == 0 : #如果是0 那麼就是block1的通路
shortcut = KL.Conv2D(filters=4*filters,kernel_size=(1,1),strides=stride)(x)
shortcut = KL.BatchNormalization(axis=3)(shortcut)
else:
#如果不等於0 那就是block2 那麼就直接接input的tensor
shortcut = x
#主通路和shortcut 相加
y = KL.Add()([y,shortcut]) #y主 shortcut支路 直接通過add層相加
import random
y = KL.Activation('relu',name='last'+str(random.randint(100,300)))(y)
return y
return f
#resnet 主輸入函數
def ResNet_Extractor(inputs):
x = KL.Conv2D(filters=64,kernel_size=(3,3),padding='same')(inputs)
x = KL.BatchNormalization(axis=3)(x)
x = KL.Activation('relu')(x)
#控制調用網絡結構feature map 特徵圖
#每個stage要有不同的 b12的數量 ,還有 第一個Block1 輸入維度後邊要迭代(stage)
filters = 64
block = [2,2,2]
for i,block_num in enumerate(block):
for block_id in range(block_num):
x = building_block(filters=filters,block=block_id)(x)
filters *= 2 #每個stage double filter個數
return x
#share map 和 anchor提取
def RpnNet(featuremap, k=9):
#特徵圖到共享層,大小沒變
shareMap = KL.Conv2D(filters=256,kernel_size=(3,3),padding='same',name='SSharemap')(featuremap)
shareMap = KL.Activation('linear')(shareMap)
#計算rpn分類前後景
rpn_classification = KL.Conv2D(filters=2*k,kernel_size=(1,1))(shareMap)
#無法reshape上面這個rpn_classification tensor,加個自定義層,tf.reshape(x爲目標,[]爲格式)
rpn_classification = KL.Lambda(lambda x:tf.reshape(x,[tf.shape(x)[0],-1,2]))(rpn_classification)
#上行[0]是取batch,-1是anchor數量每個圖大小不一樣ancho的r數量也不一樣,輸出維度2:每個anchor輸出是或不是,x就是rpn_classification
rpn_classification = KL.Activation('linear',name='rpn_classification')(rpn_classification)
#上面輸出前後景,下面還要輸出前後景的概率值
rpn_probability = KL.Activation('softmax',name='rpn_probability')(rpn_classification)
#計算迴歸修正
rpn_position = KL.Conv2D(filters=4*k,kernel_size=(1,1))(shareMap)
rpn_position = KL.Activation('linear')(rpn_position)
rpn_BoundingBox =KL.Lambda(lambda x:tf.reshape(x,[tf.shape(x)[0],-1,4]),name='rpn_POS')(rpn_position)
return rpn_classification,rpn_probability,rpn_BoundingBox
# x = KL.Input((64,64,3))
# featureMap = ResNet_Extractor(x)
# rpn_classification,rpn_probability,rpn_BoundingBox = RpnNet(featureMap,k=9)
# model = Model(inputs = [x],outputs=[rpn_classification,rpn_probability,rpn_BoundingBox])
# model.summary()
# plot_model(model=model,to_file='siezemap test.png',show_shapes=True)
1.5.1 rpn分類loss
def RPNClassLoss(rpn_match,rpn_Cal): #rpn_match原始輸入值
rpn_match = tf.squeeze(rpn_match,axis=-1) #axis=-1指最後一維
#tf.where(bool型:T/F)結果爲[行,列]
indices = tf.where(K.not_equal(x=rpn_match,y=0))
#1=1前景, 0 and -1 = 0後景
anchor_class = K.cast(K.equal(rpn_match,1),tf.int32) #return Ture = 1 False = 0
anchor_class = tf.gather_nd(params=anchor_class ,indices=indices) #這個是我們原始樣本結果
rpn_cal_class = tf.gather_nd(params=rpn_Cal,indices=indices) # 這個我們rpn計算值結果
# .sparse解析就不需one—hot了
loss = K.sparse_categorical_crossentropy(target=anchor_class,output=rpn_cal_class,from_logits=True)
# if then else
loss = K.switch(condition=tf.size(loss)>0,then_expression=K.mean(loss),else_expression=tf.constant(0.0))
#>0取平均,<0取0
return loss
1.5.2 rpn迴歸loss
#小工具提取
def batch_pack(x,counts,num_rows):
output = []
for i in range(num_rows):
output.append(x[i,:counts[i]])
return tf.concat(output,axis=0)
#位置loss
def RpnBBoxLoss(target_bbox,rpn_match,rpn_bbox):
rpn_match = tf.squeeze(input=rpn_match,axis=-1)
indice = tf.where(K.equal(x = rpn_match,y=1)) #正樣本位置
rpn_bbox = tf.gather_nd(params=rpn_bbox,indices=indice) #rpn預測值 找到=1的位置
batch_counts = K.sum(K.cast(K.equal(x = rpn_match,y=1),tf.int32),axis=-1)
target_bbox = batch_pack(x= target_bbox,counts=batch_counts,num_rows=10)
#loss 計算
diff = K.abs(target_bbox-rpn_bbox)
less_than_one = K.cast(K.less(x = diff, y=1.0),tf.float32)
loss = less_than_one * 0.5 * diff**2 + (1 - less_than_one)*(diff-0.5)
loss = K.switch(condition=tf.size(loss)>0,then_expression=K.mean(loss),else_expression=tf.constant(0.0))
return loss
#確定input
input_image = KL.Input(shape=[64,64,3],dtype=tf.float32)
input_bbox = KL.Input(shape=[None,4],dtype=tf.float32)
input_class_ids = KL.Input(shape = [None],dtype=tf.int32) # map {'dog':0,'cat':1}
input_rpn_match = KL.Input(shape=[None,1],dtype=tf.int32)
input_rpn_bbox = KL.Input(shape=[None,4],dtype=tf.float32)
#in out put
feature_map = ResNet_Extractor(input_image)
rpn_classification,rpn_probability,rpn_BoundingBox = RpnNet(feature_map,k=9)
loss_rpn_class = KL.Lambda(lambda x:RPNClassLoss(*x),name='classloss')([input_rpn_match,rpn_classification])
loss_rpn_bbox = KL.Lambda(lambda x:RpnBBoxLoss(*x),name='bboxloss')([input_rpn_bbox,input_rpn_match,rpn_BoundingBox])
model = Model(inputs=[input_image,input_bbox,input_class_ids,input_rpn_match,input_rpn_bbox],
outputs = [rpn_classification,rpn_probability,rpn_BoundingBox,loss_rpn_class,loss_rpn_bbox] )
#自定義loss 輸入
loss_layer1 = model.get_layer('classloss').output
loss_layer2 = model.get_layer('bboxloss').output
model.add_loss(tf.reduce_mean(loss_layer1))
model.add_loss(tf.reduce_mean(loss_layer2))
model.compile(loss=[None]*len(model.outputs),
optimizer=keras.optimizers.SGD(lr=0.00003))
model.summary()
2.kaggle貓狗分類
2.1 數據準備
貓狗圖下載:https://www.kaggle.com/c/dogs-vs-cats/data ,原始數據集包含25,000張貓狗圖(每個類別12,500個)大小爲543MB(壓縮後)。Keras己內建好的預訓練模型進行圖像分類, 包括:VGG16
,VGG19
,ResNet50
,InceptionV3
,InceptionResNetV2
,Xception
,MobileNet
from keras.applications import VGG16
# 實例化一個VGG16模型
conv_base = VGG16(weights='imagenet',
include_top=False, # 在這裏告訴 keras我們只需要卷積基底的權重模型資訊
input_shape=(150, 150, 3)) # 宣告我們要處理的圖像大小與顏色通道數
向構造函數傳遞了三個參數:
1weights
, 指定從哪個權重檢查點初始化模型
2.include_top
, 指定模型最後是否包含密集連接分類器。默認情況下,這個密集連接分類器對應於ImageNet的1000個類別。因爲我們打算使用自己的分類器(只有兩個類別:cat和dog),所以不用包含。
3.input_shape
, 輸入到網絡中的圖像張量(可選參數),如果不傳入這個參數,那麼網絡可以處理任意形狀的輸入
以下是VGG16“卷積基底conv_base”
架構細節:
conv_base.summary() # 打印一下模型資訊
最後這個特徵圖形狀爲(4, 4, 512),這個特徵上面添加一個密集連接分類器。
2.2 不用數據增強快速特徵提取(計算代價低)
運行ImageDataGenerator實例,將圖像及其標籤提取爲Numpy數組,調用conv_base模型的predict方法
從這些圖像的中提取特徵。
#特徵提取
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
base_dir = 'data/cats_and_dogs_small'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
datagen = ImageDataGenerator(rescale=1./255) # 產生一個"圖像資料產生器"物件
batch_size = 20 # 設定每次產生的圖像的數據批量
# 提取圖像特徵
def extract_features(directory, sample_count): # 影像的目錄, 要處理的圖像數
features = np.zeros(shape=(sample_count, 4, 4, 512)) # 根據VGG16(卷積基底)的最後一層的輪出張量規格
labels = np.zeros(shape=(sample_count)) # 要處理的圖像數
# 產生一個"圖像資料產生器"實例(資料是在檔案目錄中), 每呼叫它一次, 它會吐出特定批次數的圖像資料
generator = datagen.flow_from_directory(
directory,
target_size=(150, 150), # 設定圖像的高(height)與寬(width)
batch_size=batch_size, # 設定每次產生的圖像的數據批量
class_mode='binary') # 因爲我們的目標資料集只有兩類(cat & dog)
# 讓我們把訓練資料集所有的圖像都跑過一次
i = 0
for inputs_batch, labels_batch in generator:
features_batch = conv_base.predict(inputs_batch) # 透過“卷積基底”來提取圖像特徵
features[i * batch_size : (i + 1) * batch_size] = features_batch # 把特徵先存放起來
labels[i * batch_size : (i + 1) * batch_size] = labels_batch #把標籤先存放起來
i += 1
if i * batch_size >= sample_count:
# Note that since generators yield data indefinitely in a loop,
# we must `break` after every image has been seen once.
break
print('extract_features complete!')
return features, labels
train_features, train_labels = extract_features(train_dir, 2000) # 訓練資料的圖像特徵提取
validation_features, validation_labels = extract_features(validation_dir, 1000) # 驗證資料的圖像特徵提取
test_features, test_labels = extract_features(test_dir, 1000) # 測試資料的圖像特徵淬取
2.2.1 flatten後接分類器
提取的特徵當前是(樣本數,4,4,512)的形狀。我們將它們餵給一個密集連接(densely-connected)的分類器,所以首先我們必須把它們壓扁(flatten)成(樣本數, 8192):
train_features = np.reshape(train_features, (2000, 4 * 4 * 512))
validation_features = np.reshape(validation_features, (1000, 4 * 4 * 512))
test_features = np.reshape(test_features, (1000, 4 * 4 * 512))
下面定義一個密集連接分類器,並在剛剛保存好的數據和標籤上訓練分類器:
from keras import models
from keras import layers
from keras import optimizers
# 產生一個新的密集連接層來做爲分類器
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=4 * 4 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid')) # 因爲我的資料集只有兩類(cat & dog)
model.compile(optimizer=optimizers.RMSprop(lr=2e-5),
loss='binary_crossentropy',
metrics=['acc'])
# 把預處理的卷積基底所提取的特徵做爲input來進行訓練
history = model.fit(train_features, train_labels,
epochs=30,
batch_size=20,
validation_data=(validation_features, validation_labels))
訓練速度快,只需要處理兩個Dense層。看一下訓練過程中的損失和精度曲線:
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
從圖中可以看出,驗證精度達到了約90%,比之前從一開始就訓練小型模型效果要好很多,但是從圖中也可以看出,雖然dropout比率比較大,但模型從一開始就出現了過擬合
。這是因爲本方法沒有使用數據增強
,而數據增強對防止小型圖片數據集過擬合非常重要。
2.3 用數據增強的特徵提取(計算代價高)
這種方法速度更慢,計算代價更高,但是可以在訓練期間使用數據增強。這種方法是:擴展conv_base模型,然後在輸入數據上端到端的運行模型(這種方法計算代價很高,必須在GPU上運行)
from keras import models
from keras import layers
model = models.Sequential() # 產生一個新的網絡模型結構
model.add(conv_base) # 把預訓練的卷積基底疊上去
model.add(layers.Flatten()) # 打平
model.add(layers.Dense(256, activation='relu')) # 疊上新的密集連接層來做爲分類器
model.add(layers.Dense(1, activation='sigmoid')) # 因爲我的資料集只有兩類(cat & dog)
model.summary()
VGG16的“卷積基底”
有14,714,688個參數,非常大。上面添加的分類器有200萬個參數。在編譯和訓練模型之前,需要凍結卷積基
。凍結一個或多個層是指在訓練過程中保持其權重不變(如果不這麼做,那麼卷積基
之前學到的表示
將會在訓練
過程中被修改
)。因爲其上添加的Dense是隨機初始化的,所以非常大的權重更新會在網絡中進行傳播
,對之前學到的表示
造成很大破壞。在Keras中,凍結網絡的方法是將其trainable屬性設置爲False:
# 看一下“凍結前”有多少可以被訓練的權重
print('This is the number of trainable weights '
'before freezing the conv base:', len(model.trainable_weights))
# “凍結”卷積基底
conv_base.trainable = False
# 再看一下“凍結後”有多少可以被訓練的權重
print('This is the number of trainable weights '
'after freezing the conv base:', len(model.trainable_weights))
2.3.1 凍結後用數據增強訓練
如此設置之後,只有添加的兩個Dense層的權重纔會被訓練,總共有4個權重張量,每層2個(主權重矩陣和偏置向量),注意的是,如果想修改權重屬性trainable,那麼應該修改好屬性之後再編譯模型。下面,我們可以訓練模型了,並使用數據增強的辦法:
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
# 請注意: 驗證測試用的資料不要進行資料的增強
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
# 圖像資料的目錄
train_dir,
# 設定圖像的高(height)與寬(width)
target_size=(150, 150),
batch_size=20,
# 因爲我們的目標資料集只有兩類(cat & dog)
class_mode='binary')
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size=(150, 150),
batch_size=20,
class_mode='binary')
model.compile(loss='binary_crossentropy',
optimizer=optimizers.RMSprop(lr=2e-5),
metrics=['acc'])
history = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=30,
validation_data=validation_generator,
validation_steps=50,
verbose=2)
model.save('cats_and_dogs_small_3.h5') # 把模型儲存到檔案
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
驗證精度到了將近96%,而且減少了過擬合(在訓練集上好,驗證測試集上差)
2.4 微調模型
以上0和1都屬於特徵提取
,下面使用模型微調
進一步提高模型性能,步驟如下:
(1)在已經訓練好的基網絡(base network)上添加自定義網絡
(2)凍結基網絡
(3)訓練所添加的部分
(4)解凍基網絡
的一些層
(5)聯合訓練解凍的這些層和添加的部分
在做特徵提取
的時候已經完成了前三個步驟。我們繼續第四個步驟,先解凍conv_base,然後凍結其中的部分層。
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) (None, 150, 150, 3) 0
_________________________________________________________________
block1_conv1 (Conv2D) (None, 150, 150, 64) 1792
_________________________________________________________________
block1_conv2 (Conv2D) (None, 150, 150, 64) 36928
_________________________________________________________________
block1_pool (MaxPooling2D) (None, 75, 75, 64) 0
_________________________________________________________________
block2_conv1 (Conv2D) (None, 75, 75, 128) 73856
_________________________________________________________________
block2_conv2 (Conv2D) (None, 75, 75, 128) 147584
_________________________________________________________________
block2_pool (MaxPooling2D) (None, 37, 37, 128) 0
_________________________________________________________________
block3_conv1 (Conv2D) (None, 37, 37, 256) 295168
_________________________________________________________________
block3_conv2 (Conv2D) (None, 37, 37, 256) 590080
_________________________________________________________________
block3_conv3 (Conv2D) (None, 37, 37, 256) 590080
_________________________________________________________________
block3_pool (MaxPooling2D) (None, 18, 18, 256) 0
_________________________________________________________________
block4_conv1 (Conv2D) (None, 18, 18, 512) 1180160
_________________________________________________________________
block4_conv2 (Conv2D) (None, 18, 18, 512) 2359808
_________________________________________________________________
block4_conv3 (Conv2D) (None, 18, 18, 512) 2359808
_________________________________________________________________
block4_pool (MaxPooling2D) (None, 9, 9, 512) 0
_________________________________________________________________
block5_conv1 (Conv2D) (None, 9, 9, 512) 2359808
_________________________________________________________________
block5_conv2 (Conv2D) (None, 9, 9, 512) 2359808
_________________________________________________________________
block5_conv3 (Conv2D) (None, 9, 9, 512) 2359808
_________________________________________________________________
block5_pool (MaxPooling2D) (None, 4, 4, 512) 0
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
回顧這些層,我們將微調最後三個卷積層
,直到block4_pool之前所有層都應該被凍結,後面三層來進行訓練。爲什麼不調整更多層? 爲什麼不調整整個“卷積基底”? 我們可以,但是我們需要考慮:
1.
“卷積基底”較前面的神經層所學習到的特徵表示更加通用(generic)
,更具有可重複使用的特徵,而較高層次的特徵表示則聚焦獨特的特徵
。微調這些聚焦獨特的特徵的神經層則更爲有用。
2.
我們訓練
的參數越多,我們越有可能的過擬合(overfitting)。VGG16的“卷積基底”具有1千5百萬的參數,因此嘗試在小數據集上進行訓練是有風險的。
conv_base.trainable = True # 解凍 "卷積基底"
# 所有層直到block4_pool都應該被凍結,而 block5_conv1,block5_conv2, block5_conv3 及 block5_pool則被解凍
layers_frozen = ['block5_conv1','block5_conv2', 'block5_conv3', 'block5_pool']
for layer in conv_base.layers:
if layer.name in layers_frozen:
layer.trainable = True
else:
layer.trainable = False
# 把每一層是否可以被"trainable"的flat打印出來
for layer in conv_base.layers:
print("{}: {}".format(layer.name, layer.trainable))
2.4.1 微調並保存模型
現在可微調網絡了,我們將使用學習率非常小的RMSProp優化器來實現。之所以讓學習率很小,是因爲對於微調網絡的三層表示,我們希望其變化範圍不要太大,太大的權重可能會破壞這些表示。
model.compile(loss='binary_crossentropy',
optimizer=optimizers.RMSprop(lr=1e-5), # 使用小的learn rate
metrics=['acc'])
history = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=100,
validation_data=validation_generator,
validation_steps=50)
model.save('cats_and_dogs_small_4.h5')
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
這些曲線看起來包含噪音。爲了讓圖像更具有可讀性,可以讓每個損失和精度替換爲指數移動平均,從而讓曲線變得更加平滑,下面用一個簡單實用函數來實現:
def smooth_curve(points, factor=0.8):
smoothed_points = []
for point in points:
if smoothed_points:
previous = smoothed_points[-1]
smoothed_points.append(previous * factor + point * (1 - factor))
else:
smoothed_points.append(point)
return smoothed_points
plt.plot(epochs,
smooth_curve(acc), 'bo', label='Smoothed training acc')
plt.plot(epochs,
smooth_curve(val_acc), 'b', label='Smoothed validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs,
smooth_curve(loss), 'bo', label='Smoothed training loss')
plt.plot(epochs,
smooth_curve(val_loss), 'b', label='Smoothed validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
通過指數移動平均,驗證曲線變得更清楚了。精度提高了1%,約從96%提高到了97%。
在測試數據上最終評估這個模型:
test_generator = test_datagen.flow_from_directory(
test_dir,
target_size=(150, 150),
batch_size=20,
class_mode='binary')
test_loss, test_acc = model.evaluate_generator(test_generator, steps=50)
print('test acc:', test_acc)
得到了差不多97%的測試精度,在關於這個數據集的原始Kaggle競賽中,這個結果是最佳結果之一。我們只是用了一小部分訓練數據(約10%)就得到了這個結果。訓練20000個樣本和訓練2000個樣本還是有很大差別的。
3.Caffe_SSD三字碼識別
3.1 check List
1.
檢查 CUDA nvcc -V環境是否安裝正常 如果不正常則去安裝 NVIDIA ,CUDA ,CUDNN (版本搭配)
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Sun_Jul_28_19:07:16_PDT_2019
Cuda compilation tools, release 10.1, V10.1.243
由於Python2 即將落幕 所以我們這次在Ubuntu18.04 自帶的 python3.6上進行
2.
sudo ldconfig 檢查是否有軟連接沒有生效
3.2 正式安裝
1.
依賴解決:
sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
sudo apt-get install --no-install-recommends libboost-all-dev
sudo apt-get install libopenblas-dev liblapack-dev libatlas-base-dev
sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
sudo apt-get install git cmake build-essential
從源代碼編譯Opencv,進入官網 : http://opencv.org/releases.html , 下載 3.x系列 解壓到你要安裝的位置,命令行進入已解壓的文件夾
mkdir build # 創建編譯的文件目錄
cd build
cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local ..
make -j8 #編譯 注意自己的核數
在執行 make -j8 命令編譯到 92% 時可能會出現以下錯誤,是由於opecv3.1與cuda8.0不兼容導致的。解決辦法:修改 /opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp 文件內容,如圖:
編譯成功後安裝:sudo make install
安裝完成後通過查看 opencv 版本驗證是否安裝成功:pkg-config --modversion opencv
2.
安裝caffe-SSD
git clone https://github.com/weiliu89/caffe.git
cd caffe
git checkout ssd
再次檢查依賴
sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
sudo apt-get install --no-install-recommends libboost-all-dev
sudo apt-get install libatlas-base-dev python-dev
sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
3.
修改Makefile.config文件,複製一份的原因是編譯 caffe 時需要的是 Makefile.config 文件,而Makefile.config.example 只是caffe 給出的配置文件例子,不能用來編譯 caffe。
cp Makefile.config.example Makefile.config
sudo vim Makefile.config
應用 cudnn
將第5行的 # 取消
#USE_CUDNN := 1
修改成:
USE_CUDNN := 1
應用 opencv 版本
將第21行的 # 取消
#OPENCV_VERSION := 3
修改爲:
OPENCV_VERSION := 3
使用 python 接口
將第89行的 # 取消
#WITH_PYTHON_LAYER := 1
修改爲
WITH_PYTHON_LAYER := 1
修改 python 路徑
將 92/93行的 代碼修改如下
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
修改爲:
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial
註釋Python2 切換python3
代碼修改如下 否則會編譯Python.h and numpy/arrayobject.h. 出錯
將 67/68行的 實際行數稍微有些出路可能
PYTHON_INCLUDE := /usr/include/python2.7 \
/usr/lib/python2.7/dist-packages/numpy/core/include
修改爲:
# PYTHON_INCLUDE := /usr/include/python2.7 \
# /usr/lib/python2.7/dist-packages/numpy/core/include
將77/78 行的 註釋解除 並更新 爲Python3.6 實際行數稍微有些出路可能
# PYTHON_LIBRARIES := boost_python3 python3.5m
# PYTHON_INCLUDE := /usr/include/python3.5m \
# /usr/lib/python3.5/dist-packages/numpy/core/include
PYTHON_LIBRARIES := boost_python3 python3.6m
PYTHON_INCLUDE := /usr/include/python3.6m \
/usr/lib/python3.6/dist-packages/numpy/core/include
如果最後提示不支持compute_20,就把這句刪掉,最後效果是
nvcc fatal : Unsupported gpu architecture ‘compute_20’
Makefile:588: recipe for target ‘.build_release/cuda/src/caffe/solvers/sgd_solver.o’ failed
make: *** [.build_release/cuda/src/caffe/solvers/sgd_solver.o] Error 1
make: *** Waiting for unfinished jobs…
建議顯卡直接改成如下:
CUDA_ARCH := -gencode arch=compute_30,code=sm_30
-gencode arch=compute_35,code=sm_35
-gencode arch=compute_50,code=sm_50
-gencode arch=compute_52,code=sm_52
-gencode arch=compute_60,code=sm_60
-gencode arch=compute_61,code=sm_61
-gencode arch=compute_61,code=compute_61
然後修改caffe 目錄下的 Makefile 文件: 注意不是Makefile.config文件
sudo vim Makefile
將第409行 替換爲如下
NVCCFLAGS +=-ccbin=$(CXX) -Xcompiler-fPIC $(COMMON_FLAGS)
替換爲:
NVCCFLAGS += -D_FORCE_INLINES -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
將:181行替換爲如下
LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_hl hdf5
改爲:
LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_serial_hl hdf5_serial
將:265行 替換如下(我的環境太新除了這個問題【可選項】當你出現了
.build_release/lib/libcaffe.so: undefined reference to `boost::re_detail_106501::put_mem_block(void*)')
LIBRARIES += boost_thread stdc++
改爲:
LIBRARIES += boost_thread stdc++ boost_regex
4.
下載python 環境依賴包 :
去到caffe根目錄中的python目錄中運行 …/caffe/python
5.
設置系統python環境 在末尾添加環境變量
vim ~/.bashrc
export PYTHONPATH="/opt/build/caffe/python" # 此處爲caffe 的rootdir 目錄
source ~/.bashrc
6.
編譯caffe保存 開始編譯,在 caffe 目錄下執行 如果出錯 建議修改完畢使用 make clean 繼續Try
make all -j32 代表幾核並行編譯 請與自己電腦量力而行 後續將不再重複聲明哇
make test -j32
make pycaffe
make runtest -j32
“/ usr / bin / ld:在Caffe編譯中找不到-lopenblas”錯誤
即使在克隆OpenBlas之後包括基本包,並且將在14.04和16中鏈接相應的庫。
apt install liblapack-dev liblapack3 libopenblas-base libopenblas-dev
apt install liblapack-dev liblapack3 libopenblas-base libopenblas-dev
到此 安裝caffe 結束
7.
更改caffe 源碼: https://blog.csdn.net/sinat_14916279/article/details/56489601
安裝參考:https://blog.csdn.net/lukaslong/article/details/81390276