文章目錄

1.複寫keras_faster rcnn

1.1 解析xml信息

import xml.etree.ElementTree as ET

def parse_label(xml_file):
    #建立一個實例
    tree = ET.parse(xml_file)  # parse解析    
    #建立跟路徑
    root = tree.getroot()
    width = root.find('size').find('width').text  # 一張照片這3個量都爲1個不變
    height = root.find('size').find('height').text
    image_name = root.find('filename').text    
    #數據列表，一列爲一組數據
    category = []
    xmin = []
    ymin = []
    xmax = []
    ymax = []    
    for object in root.findall('object'):  # 因爲object有多個
        for x in object.iter('name'):  # name就是category多個
            category.append(x.text)
            xmax.append(object.find('bndbox').find('xmax').text)
            ymax.append(object.find('bndbox').find('ymax').text)
            xmin.append(object.find('bndbox').find('xmin').text)
            ymin.append(object.find('bndbox').find('ymin').text)   
    #列表組合
    ground_truth_box = [list(box) for box in zip(xmin,ymin,xmax,ymax)]
    return image_name,(width,height),category,ground_truth_box

import glob   # 讀文件夾也可以：for i in os.dir    glob可以帶路徑輸出
for name in glob.glob('/Users/mikegao/Desktop/Annotation/*'):
    print (parse_label(name),'\n')

或：

import pandas as pd
list_table =[]
for i in range(10,20):
    date = {'name':i+1,"age":i}
    list_table.append(date) # 將data append進去
print (list_table)

# 下面data來自list_table
data_frame = pd.DataFrame(data=list_table,columns=['age','name'])
data_frame.to_csv('321.csv',index=False,mode='a',header=True)

輸出的 list_table：

輸出的 data_frame：

1.2 Anchor生成

下圖box[0]爲x，box[1]爲y

結果爲：（9，256），所以有256個centerx(中心點)，每個裏面有9個框，256*9=2304

1.3 多輸出多輸入(函數式)

1.4 resnet

下圖stage1裏conv和maxpool都會做一次下采樣

import keras.layers as KL
from keras.models import Model
import keras.backend as K
import tensorflow as tf
from keras.utils import np_utils
from  keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os
%matplotlib inline
from keras.datasets import mnist

#創建resnet網絡結構
def building_block(filters,block): #filters數量根據stage會增加，不能寫死在網絡裏
    #判斷block1和2
    if block != 0:  # 不=0的話就是block2，枚舉
        stride = 1
    else:         #如果等於0 採用stride 2，兩倍下采樣 也就是 如果是 building1 使用2倍下采樣
        stride = 2
   
    def f(x):
        #主路
        y = KL.Conv2D(filters=filters,kernel_size=(1,1),strides=stride)(x)
        # 原文一開始kernel_size=(3,3)，但28*28圖卷積沒了
        y = KL.BatchNormalization(axis=3)(y) #[-1,28,28,3] axis=3對應下標3即3
        y = KL.Activation('relu')(y)
                      #注意第二層沒有stride使用padding same就是保證size相同
        y = KL.Conv2D(filters=filters, kernel_size=(3, 3), padding='same')(y) 
        y = KL.BatchNormalization(axis=3)(y)
        y = KL.Activation('relu')(y)
 
        y = KL.Conv2D(filters=4*filters,kernel_size=(1,1))(y)
        y = KL.BatchNormalization(axis=3)(y)
           
        #副路
        #判斷是哪個block 設定不同的 shortcut支路參數
        if block == 0 : #如果是0 那麼就是block1的通路
            shortcut = KL.Conv2D(filters=4*filters,kernel_size=(1,1),strides=stride)(x)
            shortcut = KL.BatchNormalization()(shortcut)
        else:
            #如果不等於0 那就是block2  那麼就直接接input的tensor
            shortcut = x      
       
        #主通路和副路shortcut 相加
        y = KL.Add()([y,shortcut]) #y主 shortcut支路 直接通過add層相加
        y = KL.Activation('relu')(y)
        return y
    return f

#resnet fp提取
def ResNet_Extractor(X_train, Y_train,X_test,Y_test):
    # 頭部，TOP：
    # customize your top input
    input = KL.Input([28,28,1])
    x = KL.Conv2D(filters=64,kernel_size=(3,3),padding='same')(input)
    x = KL.BatchNormalization(axis=3)(x)
    x = KL.Activation('relu')(x)
      
    # 身體，主要部分：控制building_block
    # 每個stage要有不同的 block 12222的數量 ，還有第一個Block1 輸入維度後邊要迭代（stage）
    filters = 64
    block = [2,2] # stage0裏有2個block,stage1裏也有2個block
    for i,block_num in enumerate(block):
        print ('---stage--', str(i) ,'---')
        for block_id in range(block_num):
            print('---block--', str(block_id) ,'---')
            x = building_block(filters=filters,block=block_id)(x)
        filters *= 2 #每個stage double filter個數
 # 注意這裏的輸出就是我們常規的featuremap的輸出x，不包含下面尾部
    
    # 尾部，輸出：
    x = KL.AveragePooling2D(pool_size=(2, 2))(x)
    x = KL.Flatten()(x)
    x = KL.Dense(units=10, activation='softmax')(x     
      
    # 定義輸入輸出
    model = Model(inputs=input, outputs=x)
    # 打印網絡
    print (model.summary())
    plot_model(model,to_file='resMnist6-9-2.png',show_shapes=True)
    # 編譯網絡
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    history = model.fit(  # 想要打印出來所有做個變量history
        X_train,
        Y_train,
        epochs=6,
        batch_size=200,
        verbose=1,
        validation_data = (X_test,Y_test),
    ) 
    model.save('./mnistRES2.h5')
    
    #打印graph 
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title("model loss")
    plt.ylabel("loss")
    plt.xlabel("epoch")
    plt.legend(["train","test"],loc="upper left")
    plt.show()
    
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()  
    return model
    
def main():
    (X_train,Y_train),(X_test,Y_test) = mnist.load_data()
    print(X_train.shape)
    X_train = X_train.reshape(-1,28,28,1)
    X_test = X_test.reshape(-1,28,28,1)
    print (X_train.shape)
    #-set type into float32 設置成浮點型
    X_train = X_train.astype('float32')  #astype SET AS TYPE into 
    X_test = X_test.astype('float32')
    X_train = X_train/255.0
    X_test /=255.0

    #Class vectors [0,0,0,0,0,0,0,1(7),0,0] #轉成二進制
    Y_test = np_utils.to_categorical(Y_test,10) #定義LABEL類數量
    Y_train = np_utils.to_categorical(Y_train,10)
    ResNet_Extractor(X_train, Y_train,X_test,Y_test)

main()

輸出：

測試：

from keras.models import load_model
import matplotlib.image as processimage
import matplotlib.pyplot as plt
import numpy as np

model = load_model('mnistRES2.h5')

image = processimage.imread('pred_image/3.jpg')
# plt.imshow(image)
# plt.show()
image_to_array = np.array(image)
image_to_array = image_to_array.reshape(-1,28,28,1)
prediction = model.predict(image_to_array)
Final_prediction = [result.argmax() for result in prediction]
print (Final_prediction,prediction)

1.5 rpn

import keras.layers as KL
from keras.models import Model
import keras.backend as K
import keras
import tensorflow as tf
from keras.utils import plot_model
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline

def building_block(filters,block):  
#     import random
#     bn = random.randint(100,300,)
    #判斷block1和2
    if block != 0: #如果不等於0 那麼使用 stride=1
        stride = 1
    else:         #如果等於0 採用stride 2 兩倍下采樣 也就是 如果是 building1 使用2倍下采樣
        stride = 2

    def f(x):
        #主通路結構
        y = KL.Conv2D(filters=filters,kernel_size=(1,1),strides=stride)(x)
        y = KL.BatchNormalization(axis=3)(y)
        y = KL.Activation('relu')(y)

        y = KL.Conv2D(filters=filters, kernel_size=(3, 3), padding='same')(y) #注意這裏沒有stride使用padding same就是保證size相同
        y = KL.BatchNormalization(axis=3)(y)
        y = KL.Activation('relu')(y)

        #主通路輸出
        y = KL.Conv2D(filters=4*filters,kernel_size=(1,1))(y)
        y = KL.BatchNormalization(axis=3)(y)

        #判斷是哪個block 設定不同的 shortcut支路參數
        if block == 0 : #如果是0 那麼就是block1的通路
            shortcut = KL.Conv2D(filters=4*filters,kernel_size=(1,1),strides=stride)(x)
            shortcut = KL.BatchNormalization(axis=3)(shortcut)
        else:
            #如果不等於0 那就是block2  那麼就直接接input的tensor
            shortcut = x

        #主通路和shortcut 相加
        y = KL.Add()([y,shortcut]) #y主 shortcut支路 直接通過add層相加
        import random
        y = KL.Activation('relu',name='last'+str(random.randint(100,300)))(y)
        return y
    return f

#resnet 主輸入函數
def ResNet_Extractor(inputs):
    x = KL.Conv2D(filters=64,kernel_size=(3,3),padding='same')(inputs)
    x = KL.BatchNormalization(axis=3)(x)
    x = KL.Activation('relu')(x)

    #控制調用網絡結構feature map 特徵圖
    #每個stage要有不同的 b12的數量 ，還有 第一個Block1 輸入維度後邊要迭代（stage）
    filters = 64
    block = [2,2,2]
    for i,block_num in enumerate(block):
        for block_id in range(block_num):
            x = building_block(filters=filters,block=block_id)(x)
        filters *= 2 #每個stage double filter個數
    return x
    
#share map 和 anchor提取
def RpnNet(featuremap, k=9):
    #特徵圖到共享層，大小沒變
    shareMap = KL.Conv2D(filters=256,kernel_size=(3,3),padding='same',name='SSharemap')(featuremap)
    shareMap = KL.Activation('linear')(shareMap)
    
    #計算rpn分類前後景
    rpn_classification = KL.Conv2D(filters=2*k,kernel_size=(1,1))(shareMap)
         #無法reshape上面這個rpn_classification tensor,加個自定義層，tf.reshape(x爲目標，[]爲格式)
    rpn_classification = KL.Lambda(lambda x:tf.reshape(x,[tf.shape(x)[0],-1,2]))(rpn_classification)
         #上行[0]是取batch，-1是anchor數量每個圖大小不一樣ancho的r數量也不一樣，輸出維度2：每個anchor輸出是或不是，x就是rpn_classification
    rpn_classification = KL.Activation('linear',name='rpn_classification')(rpn_classification)
         #上面輸出前後景，下面還要輸出前後景的概率值
    rpn_probability = KL.Activation('softmax',name='rpn_probability')(rpn_classification)
    
    #計算迴歸修正
    rpn_position = KL.Conv2D(filters=4*k,kernel_size=(1,1))(shareMap)
    rpn_position = KL.Activation('linear')(rpn_position)
    rpn_BoundingBox =KL.Lambda(lambda x:tf.reshape(x,[tf.shape(x)[0],-1,4]),name='rpn_POS')(rpn_position)
    
    return rpn_classification,rpn_probability,rpn_BoundingBox

# x = KL.Input((64,64,3)) 
# featureMap = ResNet_Extractor(x)
# rpn_classification,rpn_probability,rpn_BoundingBox = RpnNet(featureMap,k=9)
# model = Model(inputs = [x],outputs=[rpn_classification,rpn_probability,rpn_BoundingBox])
# model.summary()
# plot_model(model=model,to_file='siezemap test.png',show_shapes=True)

1.5.1 rpn分類loss

def RPNClassLoss(rpn_match,rpn_Cal): #rpn_match原始輸入值
    rpn_match = tf.squeeze(rpn_match,axis=-1) #axis=-1指最後一維
                      #tf.where（bool型：T/F）結果爲[行，列]
    indices = tf.where(K.not_equal(x=rpn_match,y=0))
    #1=1前景， 0 and -1 = 0後景
    anchor_class =  K.cast(K.equal(rpn_match,1),tf.int32) #return Ture = 1 False = 0 
    
    anchor_class = tf.gather_nd(params=anchor_class ,indices=indices) #這個是我們原始樣本結果
    
    rpn_cal_class = tf.gather_nd(params=rpn_Cal,indices=indices) # 這個我們rpn計算值結果
     
     # .sparse解析就不需one—hot了
    loss = K.sparse_categorical_crossentropy(target=anchor_class,output=rpn_cal_class,from_logits=True)
                # if                        then                         else     
    loss = K.switch(condition=tf.size(loss)>0,then_expression=K.mean(loss),else_expression=tf.constant(0.0))
                #>0取平均，<0取0
    return loss

1.5.2 rpn迴歸loss

#小工具提取
def batch_pack(x,counts,num_rows):
    output = []
    for i in range(num_rows):
        output.append(x[i,:counts[i]])
    return tf.concat(output,axis=0)
    
#位置loss
def RpnBBoxLoss(target_bbox,rpn_match,rpn_bbox):
    rpn_match = tf.squeeze(input=rpn_match,axis=-1)
    indice =  tf.where(K.equal(x = rpn_match,y=1)) #正樣本位置
    
    rpn_bbox = tf.gather_nd(params=rpn_bbox,indices=indice) #rpn預測值 找到=1的位置
    
    batch_counts = K.sum(K.cast(K.equal(x = rpn_match,y=1),tf.int32),axis=-1)
    target_bbox = batch_pack(x= target_bbox,counts=batch_counts,num_rows=10)
    
    #loss 計算
    
    diff = K.abs(target_bbox-rpn_bbox)
    less_than_one = K.cast(K.less(x = diff, y=1.0),tf.float32)
    loss = less_than_one * 0.5 * diff**2 + (1 - less_than_one)*(diff-0.5)
    
    loss = K.switch(condition=tf.size(loss)>0,then_expression=K.mean(loss),else_expression=tf.constant(0.0))

    return loss

#確定input
input_image = KL.Input(shape=[64,64,3],dtype=tf.float32)
input_bbox = KL.Input(shape=[None,4],dtype=tf.float32)
input_class_ids = KL.Input(shape = [None],dtype=tf.int32)    # map {'dog':0,'cat':1}
input_rpn_match = KL.Input(shape=[None,1],dtype=tf.int32)
input_rpn_bbox = KL.Input(shape=[None,4],dtype=tf.float32)

#in out put
feature_map = ResNet_Extractor(input_image)
rpn_classification,rpn_probability,rpn_BoundingBox = RpnNet(feature_map,k=9)

loss_rpn_class = KL.Lambda(lambda x:RPNClassLoss(*x),name='classloss')([input_rpn_match,rpn_classification])
loss_rpn_bbox = KL.Lambda(lambda x:RpnBBoxLoss(*x),name='bboxloss')([input_rpn_bbox,input_rpn_match,rpn_BoundingBox])

model = Model(inputs=[input_image,input_bbox,input_class_ids,input_rpn_match,input_rpn_bbox],
              outputs = [rpn_classification,rpn_probability,rpn_BoundingBox,loss_rpn_class,loss_rpn_bbox] )

#自定義loss 輸入
loss_layer1 = model.get_layer('classloss').output
loss_layer2 = model.get_layer('bboxloss').output

model.add_loss(tf.reduce_mean(loss_layer1))
model.add_loss(tf.reduce_mean(loss_layer2))

model.compile(loss=[None]*len(model.outputs),
             optimizer=keras.optimizers.SGD(lr=0.00003))

model.summary()

2.kaggle貓狗分類

2.1 數據準備

貓狗圖下載：https://www.kaggle.com/c/dogs-vs-cats/data ，原始數據集包含25,000張貓狗圖（每個類別12,500個）大小爲543MB（壓縮後）。Keras己內建好的預訓練模型進行圖像分類, 包括:VGG16，VGG19，ResNet50，InceptionV3，InceptionResNetV2，Xception，MobileNet

from keras.applications import VGG16
# 實例化一個VGG16模型
conv_base = VGG16(weights='imagenet',
                  include_top=False, # 在這裏告訴 keras我們只需要卷積基底的權重模型資訊
                  input_shape=(150, 150, 3)) # 宣告我們要處理的圖像大小與顏色通道數

向構造函數傳遞了三個參數：
1weights, 指定從哪個權重檢查點初始化模型
2.include_top, 指定模型最後是否包含密集連接分類器。默認情況下，這個密集連接分類器對應於ImageNet的1000個類別。因爲我們打算使用自己的分類器（只有兩個類別：cat和dog），所以不用包含。
3.input_shape, 輸入到網絡中的圖像張量（可選參數），如果不傳入這個參數，那麼網絡可以處理任意形狀的輸入
以下是VGG16“卷積基底conv_base”架構細節：

conv_base.summary() # 打印一下模型資訊

最後這個特徵圖形狀爲（4， 4， 512），這個特徵上面添加一個密集連接分類器。

2.2 不用數據增強快速特徵提取（計算代價低）

運行ImageDataGenerator實例，將圖像及其標籤提取爲Numpy數組，調用conv_base模型的predict方法從這些圖像的中提取特徵。

#特徵提取
import os
import numpy as np
from keras.preprocessing.image import ImageDataGenerator

base_dir = 'data/cats_and_dogs_small'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')

datagen = ImageDataGenerator(rescale=1./255) # 產生一個"圖像資料產生器"物件

batch_size = 20 # 設定每次產生的圖像的數據批量

# 提取圖像特徵
def extract_features(directory, sample_count): # 影像的目錄, 要處理的圖像數
    features = np.zeros(shape=(sample_count, 4, 4, 512)) # 根據VGG16(卷積基底)的最後一層的輪出張量規格
    labels = np.zeros(shape=(sample_count)) # 要處理的圖像數
    
    # 產生一個"圖像資料產生器"實例(資料是在檔案目錄中), 每呼叫它一次, 它會吐出特定批次數的圖像資料
    generator = datagen.flow_from_directory(
        directory,
        target_size=(150, 150),  # 設定圖像的高(height)與寬(width)
        batch_size=batch_size,   # 設定每次產生的圖像的數據批量
        class_mode='binary')     # 因爲我們的目標資料集只有兩類(cat & dog)
    
    # 讓我們把訓練資料集所有的圖像都跑過一次
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch) # 透過“卷積基底”來提取圖像特徵
        features[i * batch_size : (i + 1) * batch_size] = features_batch # 把特徵先存放起來
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch #把標籤先存放起來
        i += 1
        if i * batch_size >= sample_count:
            # Note that since generators yield data indefinitely in a loop,
            # we must `break` after every image has been seen once.
            break    
    print('extract_features complete!')
    return features, labels

train_features, train_labels = extract_features(train_dir, 2000) # 訓練資料的圖像特徵提取
validation_features, validation_labels = extract_features(validation_dir, 1000) # 驗證資料的圖像特徵提取
test_features, test_labels = extract_features(test_dir, 1000) # 測試資料的圖像特徵淬取

2.2.1 flatten後接分類器

提取的特徵當前是（樣本數，4，4，512）的形狀。我們將它們餵給一個密集連接(densely-connected)的分類器，所以首先我們必須把它們壓扁(flatten)成（樣本數, 8192）:

train_features = np.reshape(train_features, (2000, 4 * 4 * 512))
validation_features = np.reshape(validation_features, (1000, 4 * 4 * 512))
test_features = np.reshape(test_features, (1000, 4 * 4 * 512))

下面定義一個密集連接分類器，並在剛剛保存好的數據和標籤上訓練分類器：

from keras import models
from keras import layers
from keras import optimizers
# 產生一個新的密集連接層來做爲分類器
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=4 * 4 * 512))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid')) # 因爲我的資料集只有兩類(cat & dog)
model.compile(optimizer=optimizers.RMSprop(lr=2e-5),
              loss='binary_crossentropy',
              metrics=['acc'])

# 把預處理的卷積基底所提取的特徵做爲input來進行訓練
history = model.fit(train_features, train_labels,
                    epochs=30,
                    batch_size=20,
                    validation_data=(validation_features, validation_labels))

訓練速度快，只需要處理兩個Dense層。看一下訓練過程中的損失和精度曲線：

import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

從圖中可以看出，驗證精度達到了約90%，比之前從一開始就訓練小型模型效果要好很多，但是從圖中也可以看出，雖然dropout比率比較大，但模型從一開始就出現了過擬合。這是因爲本方法沒有使用數據增強，而數據增強對防止小型圖片數據集過擬合非常重要。

2.3 用數據增強的特徵提取（計算代價高）

這種方法速度更慢，計算代價更高，但是可以在訓練期間使用數據增強。這種方法是：擴展conv_base模型，然後在輸入數據上端到端的運行模型（這種方法計算代價很高，必須在GPU上運行）

from keras import models
from keras import layers

model = models.Sequential() # 產生一個新的網絡模型結構
model.add(conv_base)        # 把預訓練的卷積基底疊上去
model.add(layers.Flatten()) # 打平
model.add(layers.Dense(256, activation='relu'))  # 疊上新的密集連接層來做爲分類器
model.add(layers.Dense(1, activation='sigmoid')) # 因爲我的資料集只有兩類(cat & dog)
model.summary()

VGG16的“卷積基底”有14,714,688個參數，非常大。上面添加的分類器有200萬個參數。在編譯和訓練模型之前，需要凍結卷積基。凍結一個或多個層是指在訓練過程中保持其權重不變（如果不這麼做，那麼卷積基之前學到的表示將會在訓練過程中被修改）。因爲其上添加的Dense是隨機初始化的，所以非常大的權重更新會在網絡中進行傳播，對之前學到的表示造成很大破壞。在Keras中，凍結網絡的方法是將其trainable屬性設置爲False：

# 看一下“凍結前”有多少可以被訓練的權重
print('This is the number of trainable weights '
      'before freezing the conv base:', len(model.trainable_weights))

# “凍結”卷積基底
conv_base.trainable = False

# 再看一下“凍結後”有多少可以被訓練的權重
print('This is the number of trainable weights '
      'after freezing the conv base:', len(model.trainable_weights))

2.3.1 凍結後用數據增強訓練

如此設置之後，只有添加的兩個Dense層的權重纔會被訓練，總共有4個權重張量，每層2個（主權重矩陣和偏置向量），注意的是，如果想修改權重屬性trainable，那麼應該修改好屬性之後再編譯模型。下面，我們可以訓練模型了，並使用數據增強的辦法：

from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

# 請注意: 驗證測試用的資料不要進行資料的增強
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        # 圖像資料的目錄
        train_dir,
        # 設定圖像的高(height)與寬(width)
        target_size=(150, 150),
        batch_size=20,
        # 因爲我們的目標資料集只有兩類(cat & dog)
        class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
        validation_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=2e-5),
              metrics=['acc'])

history = model.fit_generator(
      train_generator,
      steps_per_epoch=100,
      epochs=30,
      validation_data=validation_generator,
      validation_steps=50,
      verbose=2)

model.save('cats_and_dogs_small_3.h5') # 把模型儲存到檔案

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

驗證精度到了將近96%，而且減少了過擬合（在訓練集上好，驗證測試集上差）

2.4 微調模型

以上0和1都屬於特徵提取，下面使用模型微調進一步提高模型性能，步驟如下：
（1）在已經訓練好的基網絡（base network）上添加自定義網絡
（2）凍結基網絡
（3）訓練所添加的部分
（4）解凍基網絡的一些層
（5）聯合訓練解凍的這些層和添加的部分
在做特徵提取的時候已經完成了前三個步驟。我們繼續第四個步驟，先解凍conv_base，然後凍結其中的部分層。

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         (None, 150, 150, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 150, 150, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 150, 150, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 75, 75, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 75, 75, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 75, 75, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 37, 37, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 37, 37, 256)       295168    
_________________________________________________________________
block3_conv2 (Conv2D)        (None, 37, 37, 256)       590080    
_________________________________________________________________
block3_conv3 (Conv2D)        (None, 37, 37, 256)       590080    
_________________________________________________________________
block3_pool (MaxPooling2D)   (None, 18, 18, 256)       0         
_________________________________________________________________
block4_conv1 (Conv2D)        (None, 18, 18, 512)       1180160   
_________________________________________________________________
block4_conv2 (Conv2D)        (None, 18, 18, 512)       2359808   
_________________________________________________________________
block4_conv3 (Conv2D)        (None, 18, 18, 512)       2359808   
_________________________________________________________________
block4_pool (MaxPooling2D)   (None, 9, 9, 512)         0         
_________________________________________________________________
block5_conv1 (Conv2D)        (None, 9, 9, 512)         2359808   
_________________________________________________________________
block5_conv2 (Conv2D)        (None, 9, 9, 512)         2359808   
_________________________________________________________________
block5_conv3 (Conv2D)        (None, 9, 9, 512)         2359808   
_________________________________________________________________
block5_pool (MaxPooling2D)   (None, 4, 4, 512)         0         
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0

回顧這些層，我們將微調最後三個卷積層，直到block4_pool之前所有層都應該被凍結，後面三層來進行訓練。爲什麼不調整更多層? 爲什麼不調整整個“卷積基底”？我們可以，但是我們需要考慮：
1. “卷積基底”較前面的神經層所學習到的特徵表示更加通用(generic)，更具有可重複使用的特徵，而較高層次的特徵表示則聚焦獨特的特徵。微調這些聚焦獨特的特徵的神經層則更爲有用。
2. 我們訓練的參數越多，我們越有可能的過擬合(overfitting)。VGG16的“卷積基底”具有1千5百萬的參數，因此嘗試在小數據集上進行訓練是有風險的。

conv_base.trainable = True # 解凍 "卷積基底"

# 所有層直到block4_pool都應該被凍結，而 block5_conv1，block5_conv2, block5_conv3 及 block5_pool則被解凍        
layers_frozen = ['block5_conv1','block5_conv2', 'block5_conv3', 'block5_pool']
for layer in conv_base.layers:
    if layer.name in layers_frozen:
        layer.trainable = True
    else:
        layer.trainable = False
        
# 把每一層是否可以被"trainable"的flat打印出來
for layer in conv_base.layers:
    print("{}: {}".format(layer.name, layer.trainable))

2.4.1 微調並保存模型

現在可微調網絡了，我們將使用學習率非常小的RMSProp優化器來實現。之所以讓學習率很小，是因爲對於微調網絡的三層表示，我們希望其變化範圍不要太大，太大的權重可能會破壞這些表示。

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=1e-5), # 使用小的learn rate
              metrics=['acc'])

history = model.fit_generator(
      train_generator,
      steps_per_epoch=100,
      epochs=100,
      validation_data=validation_generator,
      validation_steps=50)

model.save('cats_and_dogs_small_4.h5')

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

這些曲線看起來包含噪音。爲了讓圖像更具有可讀性，可以讓每個損失和精度替換爲指數移動平均，從而讓曲線變得更加平滑，下面用一個簡單實用函數來實現：

def smooth_curve(points, factor=0.8):
  smoothed_points = []
  for point in points:
    if smoothed_points:
      previous = smoothed_points[-1]
      smoothed_points.append(previous * factor + point * (1 - factor))
    else:
      smoothed_points.append(point)
  return smoothed_points
plt.plot(epochs,
         smooth_curve(acc), 'bo', label='Smoothed training acc')
plt.plot(epochs,
         smooth_curve(val_acc), 'b', label='Smoothed validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()

plt.plot(epochs,
         smooth_curve(loss), 'bo', label='Smoothed training loss')
plt.plot(epochs,
         smooth_curve(val_loss), 'b', label='Smoothed validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

通過指數移動平均，驗證曲線變得更清楚了。精度提高了1%，約從96%提高到了97%。
在測試數據上最終評估這個模型：

test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

test_loss, test_acc = model.evaluate_generator(test_generator, steps=50)
print('test acc:', test_acc)

得到了差不多97%的測試精度，在關於這個數據集的原始Kaggle競賽中，這個結果是最佳結果之一。我們只是用了一小部分訓練數據（約10%）就得到了這個結果。訓練20000個樣本和訓練2000個樣本還是有很大差別的。

3.Caffe_SSD三字碼識別

3.1 check List

1.檢查 CUDA nvcc -V環境是否安裝正常如果不正常則去安裝 NVIDIA ，CUDA ，CUDNN （版本搭配）

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Sun_Jul_28_19:07:16_PDT_2019
Cuda compilation tools, release 10.1, V10.1.243

由於Python2 即將落幕所以我們這次在Ubuntu18.04 自帶的 python3.6上進行
2.sudo ldconfig 檢查是否有軟連接沒有生效

3.2 正式安裝

1.依賴解決：

sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
 
sudo apt-get install --no-install-recommends libboost-all-dev
 
sudo apt-get install libopenblas-dev liblapack-dev libatlas-base-dev
 
sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev
 
sudo apt-get install git cmake build-essential

從源代碼編譯Opencv，進入官網 : http://opencv.org/releases.html , 下載 3.x系列解壓到你要安裝的位置，命令行進入已解壓的文件夾

mkdir build # 創建編譯的文件目錄
 
cd build
 
cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr/local ..
 
make -j8  #編譯  注意自己的核數

在執行 make -j8 命令編譯到 92% 時可能會出現以下錯誤，是由於opecv3.1與cuda8.0不兼容導致的。解決辦法：修改 /opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp 文件內容，如圖：

編譯成功後安裝：sudo make install
安裝完成後通過查看 opencv 版本驗證是否安裝成功：pkg-config --modversion opencv

2.安裝caffe-SSD

git clone https://github.com/weiliu89/caffe.git
cd caffe
git checkout ssd

再次檢查依賴

sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
sudo apt-get install --no-install-recommends libboost-all-dev
sudo apt-get install libatlas-base-dev python-dev
sudo apt-get install libgflags-dev libgoogle-glog-dev liblmdb-dev

3.修改Makefile.config文件,複製一份的原因是編譯 caffe 時需要的是 Makefile.config 文件，而Makefile.config.example 只是caffe 給出的配置文件例子，不能用來編譯 caffe。

cp Makefile.config.example Makefile.config
sudo vim Makefile.config

應用 cudnn

將第5行的 # 取消
#USE_CUDNN := 1
修改成： 
USE_CUDNN := 1

應用 opencv 版本

將第21行的 # 取消
#OPENCV_VERSION := 3 
修改爲： 
OPENCV_VERSION := 3

使用 python 接口

將第89行的 # 取消
#WITH_PYTHON_LAYER := 1 
修改爲 
WITH_PYTHON_LAYER := 1

修改 python 路徑

將 92/93行的 代碼修改如下  
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib 
修改爲： 
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial

註釋Python2 切換python3
代碼修改如下否則會編譯Python.h and numpy/arrayobject.h. 出錯

將 67/68行的   實際行數稍微有些出路可能 
 PYTHON_INCLUDE := /usr/include/python2.7 \
                  /usr/lib/python2.7/dist-packages/numpy/core/include
修改爲： 
 # PYTHON_INCLUDE := /usr/include/python2.7 \
 #                 /usr/lib/python2.7/dist-packages/numpy/core/include 
 
 將77/78 行的 註釋解除  並更新  爲Python3.6  實際行數稍微有些出路可能 
  # PYTHON_LIBRARIES := boost_python3 python3.5m
 # PYTHON_INCLUDE := /usr/include/python3.5m \
 #                 /usr/lib/python3.5/dist-packages/numpy/core/include
  PYTHON_LIBRARIES := boost_python3 python3.6m
  PYTHON_INCLUDE := /usr/include/python3.6m \
                   /usr/lib/python3.6/dist-packages/numpy/core/include

如果最後提示不支持compute_20,就把這句刪掉，最後效果是
nvcc fatal : Unsupported gpu architecture ‘compute_20’
Makefile:588: recipe for target ‘.build_release/cuda/src/caffe/solvers/sgd_solver.o’ failed
make: *** [.build_release/cuda/src/caffe/solvers/sgd_solver.o] Error 1
make: *** Waiting for unfinished jobs…

建議顯卡直接改成如下:
CUDA_ARCH := -gencode arch=compute_30,code=sm_30
-gencode arch=compute_35,code=sm_35
-gencode arch=compute_50,code=sm_50
-gencode arch=compute_52,code=sm_52
-gencode arch=compute_60,code=sm_60
-gencode arch=compute_61,code=sm_61
-gencode arch=compute_61,code=compute_61

然後修改caffe 目錄下的 Makefile 文件：注意不是Makefile.config文件

sudo vim Makefile
將第409行 替換爲如下  
NVCCFLAGS +=-ccbin=$(CXX) -Xcompiler-fPIC $(COMMON_FLAGS)
替換爲：
NVCCFLAGS += -D_FORCE_INLINES -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
將：181行替換爲如下
LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_hl hdf5
改爲：
LIBRARIES += glog gflags protobuf boost_system boost_filesystem m hdf5_serial_hl hdf5_serial

將：265行 替換如下（我的環境太新除了這個問題【可選項】當你出現了
.build_release/lib/libcaffe.so: undefined reference to `boost::re_detail_106501::put_mem_block(void*)'）

LIBRARIES += boost_thread stdc++
改爲：
LIBRARIES += boost_thread stdc++ boost_regex

4.下載python 環境依賴包　:
去到caffe根目錄中的python目錄中運行 …/caffe/python
5.設置系統python環境在末尾添加環境變量

vim ~/.bashrc
export PYTHONPATH="/opt/build/caffe/python"   # 此處爲caffe 的rootdir 目錄
source ~/.bashrc

6.編譯caffe保存開始編譯，在 caffe 目錄下執行如果出錯建議修改完畢使用 make clean 繼續Try

make all -j32  代表幾核並行編譯 請與自己電腦量力而行 後續將不再重複聲明哇 
make test -j32
make pycaffe
make runtest　-j32

“/ usr / bin / ld：在Caffe編譯中找不到-lopenblas”錯誤
即使在克隆OpenBlas之後包括基本包，並且將在14.04和16中鏈接相應的庫。
apt install liblapack-dev liblapack3 libopenblas-base libopenblas-dev
apt install liblapack-dev liblapack3 libopenblas-base libopenblas-dev
到此安裝caffe 結束
7.更改caffe 源碼: https://blog.csdn.net/sinat_14916279/article/details/56489601

安裝參考：https://blog.csdn.net/lukaslong/article/details/81390276

【CV】Faster Rcnn，Kaggle貓狗分類，Caffe_SSD三字碼識別

文章目錄

1.複寫keras_faster rcnn

1.1 解析xml信息

1.2 Anchor生成

1.3 多輸出多輸入(函數式)

1.4 resnet

1.5 rpn

1.5.1 rpn分類loss

1.5.2 rpn迴歸loss

2.kaggle貓狗分類

2.1 數據準備

2.2 不用數據增強快速特徵提取（計算代價低）

2.2.1 flatten後接分類器

2.3 用數據增強的特徵提取（計算代價高）

2.3.1 凍結後用數據增強訓練

2.4 微調模型

2.4.1 微調並保存模型

3.Caffe_SSD三字碼識別

3.1 check List

3.2 正式安裝

Python實現大麥網搶票的四大關鍵技術點解析

salesforce零基礎學習（一百三十八）零碎知識點小總結（十）

關於接口協議，你必須要知道這些！

【C/C++】項目_9_文件傳輸系統（tcpput/getfile.cpp，tcpfileserver.cpp）

【CV】關於計算機視覺理論基礎知識全介紹

【C/C++】C基礎_4_數據類型轉換，結構體，格式化輸出，/main函數的參數，動態內存管理

【C/C++】C++基礎_2_運算符重載，類的繼承與派生，/類的多態，socket

【CV】Python+Opencv4+字符分割識別及車牌識別矯正

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結