基於深度學習的人臉識別系統:卷積神經網絡實現(VIPLFaceNet、VGGNet、Xception、ResNet50、ResNet18)、AM-Softmax損失

日萌社

人工智能AI:Keras PyTorch MXNet TensorFlow PaddlePaddle 深度學習實戰(不定時更新)


train.py

#!/usr/bin env python
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, CSVLogger, ModelCheckpoint
# DO NOT REMOVE THIS:
from model.cnn_models import *
from utils.data_generator import DataGenerator
from model.amsoftmax import wrap_cnn, amsoftmax_loss

input_shape = (64, 64, 1)
batch_size = 64
num_epochs = 1000
#patience:生成被監視的驗證數量,當training時無再改進時將停下來。驗證數量不能爲每個epoch
patience = 100
log_file_path = "./log.csv"
cnn = "ResNet18"
trained_models_path = "./trained_models/" + cnn

generator = DataGenerator(dataset="olivettifaces",
                          path="./data/olivetti_faces/olivettifaces.jpg",
                          batch_size=batch_size,
                          input_size=input_shape,
                          is_shuffle=True,
                          data_augmentation=10,
                          validation_split=.2)

num_classes, num_images, training_set_size, validation_set_size = generator.get_number()
#類型數40、images數8400、訓練集大小6720、驗證集大小1680
print("get_number:",num_classes, num_images, training_set_size, validation_set_size)

#eval() 函數用來執行一個字符串表達式,並返回表達式的值。
#此處eval("ResNet18") 實際執行的是 ./model/cnn_models文件中的def ResNet18()函數
model = wrap_cnn(model=eval(cnn),
                 feature_layer="feature",
                 input_shape=input_shape,
                 num_classes=num_classes)

model.compile(optimizer='adam',
              loss=amsoftmax_loss, #自定義amsoftmax_loss
              metrics=['accuracy'])
model.summary()

#畫出model網絡模型的拓撲圖
from tensorflow.keras.utils import plot_model
#如果是在默認的C:\Users\Administrator路徑下打開的命令提示符窗口執行的代碼的話,則保存圖片到該當前默認路徑下
plot_model(model,to_file="model.png") #產生網絡模型的拓撲圖
from IPython.display import Image #畫生網絡模型的拓撲圖
Image("model.png")

# callbacks
early_stop = EarlyStopping('loss', 0.1, patience=patience)
reduce_lr = ReduceLROnPlateau('loss', factor=0.1, patience=int(patience / 2), verbose=1)
csv_logger = CSVLogger(log_file_path, append=False)
#epoch和accuracy這樣的單詞字眼必須對應fit訓練時返回的字典中的key:accuracy、loss等,否則會報錯Key Error找不到該key
model_names = trained_models_path + '.{epoch:02d}-{accuracy:2f}.hdf5'
model_checkpoint = ModelCheckpoint(model_names,
                                   monitor='loss',
                                   verbose=1,
                                   save_best_only=True,
                                   save_weights_only=False)
callbacks = [model_checkpoint, csv_logger, early_stop, reduce_lr]

# train model by generator
model.fit_generator(generator=generator.flow('train'),
                    steps_per_epoch=int(training_set_size / batch_size),
                    epochs=num_epochs,
                    verbose=1,
                    callbacks=callbacks,
                    validation_data=generator.flow('validate'),
                    validation_steps=int(validation_set_size / batch_size)
                    )



"""
Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input (InputLayer)              (None, 64, 64, 1)    0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 70, 70, 1)    0           input[0][0]                      
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 32, 32, 64)   3200        conv1_pad[0][0]                  
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 32, 32, 64)   256         conv1[0][0]                      
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 32, 32, 64)   0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
pool1_pad (ZeroPadding2D)       (None, 34, 34, 64)   0           activation_1[0][0]               
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 16, 16, 64)   0           pool1_pad[0][0]                  
__________________________________________________________________________________________________
zero_padding2d_1 (ZeroPadding2D (None, 18, 18, 64)   0           max_pooling2d_1[0][0]            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 8, 8, 64)     36928       zero_padding2d_1[0][0]           
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 8, 8, 64)     256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_2 (Activation)       (None, 8, 8, 64)     0           batch_normalization_2[0][0]      
__________________________________________________________________________________________________
zero_padding2d_2 (ZeroPadding2D (None, 10, 10, 64)   0           activation_2[0][0]               
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 8, 8, 64)     36928       zero_padding2d_2[0][0]           
__________________________________________________________________________________________________
conv2d_3 (Conv2D)               (None, 8, 8, 64)     4160        max_pooling2d_1[0][0]            
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 8, 8, 64)     256         conv2d_2[0][0]                   
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 8, 8, 64)     256         conv2d_3[0][0]                   
__________________________________________________________________________________________________
add_1 (Add)                     (None, 8, 8, 64)     0           batch_normalization_3[0][0]      
                                                                 batch_normalization_4[0][0]      
__________________________________________________________________________________________________
activation_3 (Activation)       (None, 8, 8, 64)     0           add_1[0][0]                      
__________________________________________________________________________________________________
zero_padding2d_3 (ZeroPadding2D (None, 10, 10, 64)   0           activation_3[0][0]               
__________________________________________________________________________________________________
conv2d_4 (Conv2D)               (None, 8, 8, 64)     36928       zero_padding2d_3[0][0]           
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 8, 8, 64)     256         conv2d_4[0][0]                   
__________________________________________________________________________________________________
activation_4 (Activation)       (None, 8, 8, 64)     0           batch_normalization_5[0][0]      
__________________________________________________________________________________________________
zero_padding2d_4 (ZeroPadding2D (None, 10, 10, 64)   0           activation_4[0][0]               
__________________________________________________________________________________________________
conv2d_5 (Conv2D)               (None, 8, 8, 64)     36928       zero_padding2d_4[0][0]           
__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 8, 8, 64)     256         conv2d_5[0][0]                   
__________________________________________________________________________________________________
add_2 (Add)                     (None, 8, 8, 64)     0           batch_normalization_6[0][0]      
                                                                 activation_3[0][0]               
__________________________________________________________________________________________________
activation_5 (Activation)       (None, 8, 8, 64)     0           add_2[0][0]                      
__________________________________________________________________________________________________
zero_padding2d_5 (ZeroPadding2D (None, 10, 10, 64)   0           activation_5[0][0]               
__________________________________________________________________________________________________
conv2d_6 (Conv2D)               (None, 4, 4, 128)    73856       zero_padding2d_5[0][0]           
__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 4, 4, 128)    512         conv2d_6[0][0]                   
__________________________________________________________________________________________________
activation_6 (Activation)       (None, 4, 4, 128)    0           batch_normalization_7[0][0]      
__________________________________________________________________________________________________
zero_padding2d_6 (ZeroPadding2D (None, 6, 6, 128)    0           activation_6[0][0]               
__________________________________________________________________________________________________
conv2d_7 (Conv2D)               (None, 4, 4, 128)    147584      zero_padding2d_6[0][0]           
__________________________________________________________________________________________________
conv2d_8 (Conv2D)               (None, 4, 4, 128)    8320        activation_5[0][0]               
__________________________________________________________________________________________________
batch_normalization_8 (BatchNor (None, 4, 4, 128)    512         conv2d_7[0][0]                   
__________________________________________________________________________________________________
batch_normalization_9 (BatchNor (None, 4, 4, 128)    512         conv2d_8[0][0]                   
__________________________________________________________________________________________________
add_3 (Add)                     (None, 4, 4, 128)    0           batch_normalization_8[0][0]      
                                                                 batch_normalization_9[0][0]      
__________________________________________________________________________________________________
activation_7 (Activation)       (None, 4, 4, 128)    0           add_3[0][0]                      
__________________________________________________________________________________________________
zero_padding2d_7 (ZeroPadding2D (None, 6, 6, 128)    0           activation_7[0][0]               
__________________________________________________________________________________________________
conv2d_9 (Conv2D)               (None, 4, 4, 128)    147584      zero_padding2d_7[0][0]           
__________________________________________________________________________________________________
batch_normalization_10 (BatchNo (None, 4, 4, 128)    512         conv2d_9[0][0]                   
__________________________________________________________________________________________________
activation_8 (Activation)       (None, 4, 4, 128)    0           batch_normalization_10[0][0]     
__________________________________________________________________________________________________
zero_padding2d_8 (ZeroPadding2D (None, 6, 6, 128)    0           activation_8[0][0]               
__________________________________________________________________________________________________
conv2d_10 (Conv2D)              (None, 4, 4, 128)    147584      zero_padding2d_8[0][0]           
__________________________________________________________________________________________________
batch_normalization_11 (BatchNo (None, 4, 4, 128)    512         conv2d_10[0][0]                  
__________________________________________________________________________________________________
add_4 (Add)                     (None, 4, 4, 128)    0           batch_normalization_11[0][0]     
                                                                 activation_7[0][0]               
__________________________________________________________________________________________________
activation_9 (Activation)       (None, 4, 4, 128)    0           add_4[0][0]                      
__________________________________________________________________________________________________
zero_padding2d_9 (ZeroPadding2D (None, 6, 6, 128)    0           activation_9[0][0]               
__________________________________________________________________________________________________
conv2d_11 (Conv2D)              (None, 2, 2, 256)    295168      zero_padding2d_9[0][0]           
__________________________________________________________________________________________________
batch_normalization_12 (BatchNo (None, 2, 2, 256)    1024        conv2d_11[0][0]                  
__________________________________________________________________________________________________
activation_10 (Activation)      (None, 2, 2, 256)    0           batch_normalization_12[0][0]     
__________________________________________________________________________________________________
zero_padding2d_10 (ZeroPadding2 (None, 4, 4, 256)    0           activation_10[0][0]              
__________________________________________________________________________________________________
conv2d_12 (Conv2D)              (None, 2, 2, 256)    590080      zero_padding2d_10[0][0]          
__________________________________________________________________________________________________
conv2d_13 (Conv2D)              (None, 2, 2, 256)    33024       activation_9[0][0]               
__________________________________________________________________________________________________
batch_normalization_13 (BatchNo (None, 2, 2, 256)    1024        conv2d_12[0][0]                  
__________________________________________________________________________________________________
batch_normalization_14 (BatchNo (None, 2, 2, 256)    1024        conv2d_13[0][0]                  
__________________________________________________________________________________________________
add_5 (Add)                     (None, 2, 2, 256)    0           batch_normalization_13[0][0]     
                                                                 batch_normalization_14[0][0]     
__________________________________________________________________________________________________
activation_11 (Activation)      (None, 2, 2, 256)    0           add_5[0][0]                      
__________________________________________________________________________________________________
zero_padding2d_11 (ZeroPadding2 (None, 4, 4, 256)    0           activation_11[0][0]              
__________________________________________________________________________________________________
conv2d_14 (Conv2D)              (None, 2, 2, 256)    590080      zero_padding2d_11[0][0]          
__________________________________________________________________________________________________
batch_normalization_15 (BatchNo (None, 2, 2, 256)    1024        conv2d_14[0][0]                  
__________________________________________________________________________________________________
activation_12 (Activation)      (None, 2, 2, 256)    0           batch_normalization_15[0][0]     
__________________________________________________________________________________________________
zero_padding2d_12 (ZeroPadding2 (None, 4, 4, 256)    0           activation_12[0][0]              
__________________________________________________________________________________________________
conv2d_15 (Conv2D)              (None, 2, 2, 256)    590080      zero_padding2d_12[0][0]          
__________________________________________________________________________________________________
batch_normalization_16 (BatchNo (None, 2, 2, 256)    1024        conv2d_15[0][0]                  
__________________________________________________________________________________________________
add_6 (Add)                     (None, 2, 2, 256)    0           batch_normalization_16[0][0]     
                                                                 activation_11[0][0]              
__________________________________________________________________________________________________
activation_13 (Activation)      (None, 2, 2, 256)    0           add_6[0][0]                      
__________________________________________________________________________________________________
zero_padding2d_13 (ZeroPadding2 (None, 4, 4, 256)    0           activation_13[0][0]              
__________________________________________________________________________________________________
conv2d_16 (Conv2D)              (None, 1, 1, 512)    1180160     zero_padding2d_13[0][0]          
__________________________________________________________________________________________________
batch_normalization_17 (BatchNo (None, 1, 1, 512)    2048        conv2d_16[0][0]                  
__________________________________________________________________________________________________
activation_14 (Activation)      (None, 1, 1, 512)    0           batch_normalization_17[0][0]     
__________________________________________________________________________________________________
zero_padding2d_14 (ZeroPadding2 (None, 3, 3, 512)    0           activation_14[0][0]              
__________________________________________________________________________________________________
conv2d_17 (Conv2D)              (None, 1, 1, 512)    2359808     zero_padding2d_14[0][0]          
__________________________________________________________________________________________________
conv2d_18 (Conv2D)              (None, 1, 1, 512)    131584      activation_13[0][0]              
__________________________________________________________________________________________________
batch_normalization_18 (BatchNo (None, 1, 1, 512)    2048        conv2d_17[0][0]                  
__________________________________________________________________________________________________
batch_normalization_19 (BatchNo (None, 1, 1, 512)    2048        conv2d_18[0][0]                  
__________________________________________________________________________________________________
add_7 (Add)                     (None, 1, 1, 512)    0           batch_normalization_18[0][0]     
                                                                 batch_normalization_19[0][0]     
__________________________________________________________________________________________________
activation_15 (Activation)      (None, 1, 1, 512)    0           add_7[0][0]                      
__________________________________________________________________________________________________
zero_padding2d_15 (ZeroPadding2 (None, 3, 3, 512)    0           activation_15[0][0]              
__________________________________________________________________________________________________
conv2d_19 (Conv2D)              (None, 1, 1, 512)    2359808     zero_padding2d_15[0][0]          
__________________________________________________________________________________________________
batch_normalization_20 (BatchNo (None, 1, 1, 512)    2048        conv2d_19[0][0]                  
__________________________________________________________________________________________________
activation_16 (Activation)      (None, 1, 1, 512)    0           batch_normalization_20[0][0]     
__________________________________________________________________________________________________
zero_padding2d_16 (ZeroPadding2 (None, 3, 3, 512)    0           activation_16[0][0]              
__________________________________________________________________________________________________
conv2d_20 (Conv2D)              (None, 1, 1, 512)    2359808     zero_padding2d_16[0][0]          
__________________________________________________________________________________________________
batch_normalization_21 (BatchNo (None, 1, 1, 512)    2048        conv2d_20[0][0]                  
__________________________________________________________________________________________________
add_8 (Add)                     (None, 1, 1, 512)    0           batch_normalization_21[0][0]     
                                                                 activation_15[0][0]              
__________________________________________________________________________________________________
activation_17 (Activation)      (None, 1, 1, 512)    0           add_8[0][0]                      
__________________________________________________________________________________________________
feature (GlobalAveragePooling2D (None, 512)          0           activation_17[0][0]              
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 512)          0           feature[0][0]                    
__________________________________________________________________________________________________
predictions (AMSoftmax)         (None, 40)           20480       dropout_1[0][0]                  
==================================================================================================
Total params: 11,209,536
Trainable params: 11,199,808
Non-trainable params: 9,728
__________________________________________________________________________________________________
"""

cnn_models.py

from keras import layers
from keras.layers import Activation, Dropout, Conv2D, Dense
from keras.layers import BatchNormalization
from keras.layers import Flatten
from keras.layers import GlobalAveragePooling2D
from keras.layers import InputLayer, Input
from keras.layers import MaxPooling2D
from keras.layers import SeparableConv2D
from keras.layers import ZeroPadding2D, Add
from keras.models import Model
from keras.models import Sequential
from keras.regularizers import l2


# 來自 VIPLFaceNet
# 你可以在以下網址看到文獻: https://arxiv.org/abs/1609.03892
def VIPL_FaceNet(input_shape, num_classes):
    model = Sequential()
    model.add(InputLayer(input_shape=input_shape,
                         name="input"))

    # Conv layer 1 output shape (55, 55, 48)
    model.add(Conv2D(
        kernel_size=(9, 9),
        activation="relu",
        filters=48,
        strides=(4, 4)
    ))
    # pool1
    model.add(MaxPooling2D((3, 3), strides=(2, 2), padding='same'))

    # Conv layer 2 output shape (27, 27, 128)
    model.add(Conv2D(
        strides=(1, 1),
        kernel_size=(3, 3),
        activation="relu",
        filters=128
    ))

    # Conv layer 3 output shape (13, 13, 192)
    model.add(Conv2D(
        strides=(1, 1),
        kernel_size=(3, 3),
        activation="relu",
        filters=128
    ))

    # pool2
    model.add(MaxPooling2D((3, 3), strides=(2, 2), padding='same'))

    # conv4
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=256,
        padding="same",
        strides=(1, 1)
    ))

    # conv5
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=192,
        padding="same",
        strides=(1, 1)
    ))

    # conv6
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=192,
        padding="same",
        strides=(1, 1)
    ))

    # conv7
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=128,
        padding="same",
        strides=(1, 1)
    ))

    # pool3
    model.add(MaxPooling2D((3, 3), strides=(2, 2), padding='same'))

    # fully connected layer 1
    model.add(Flatten())
    model.add(Dense(4096))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    # fully connected layer 2
    model.add(Dense(2048))
    model.add(BatchNormalization())
    model.add(Activation('relu', name="feature"))
    model.add(Dropout(0.5))

    # output
    model.add(Dense(num_classes))
    model.add(Activation('softmax', name='predictions'))

    # return
    return model


# 實現 VGGNet
def VGGNet(input_shape, num_classes):
    # 因爲 VGGNet 更深,而且網絡中有許多max pooling,
    # 不建議input_shape太小,原input_shape是 (224, 224, 3)。
    assert input_shape[0] >= 224 and input_shape[1] >= 224

    model = Sequential()
    model.add(InputLayer(input_shape=input_shape,
                         name="input"))

    # Conv1,2
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=64,
        strides=(1, 1)))
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=64,
        strides=(1, 1)))

    # pool1
    model.add(MaxPooling2D((2, 2), strides=(2, 2),
                           padding='same'))

    # Conv 3,4
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=128,
        strides=(1, 1)))
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=128,
        strides=(1, 1)))

    # pool2
    model.add(MaxPooling2D((2, 2), strides=(2, 2),
                           padding='same'))

    # Conv 5-7
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=256,
        strides=(1, 1)))
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=256,
        strides=(1, 1)))
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=256,
        strides=(1, 1)))

    # pool3
    model.add(MaxPooling2D((2, 2), strides=(2, 2),
                           padding='same'))
    # Conv 8-10
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=512,
        strides=(1, 1)))

    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=512,
        strides=(1, 1)))
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=512,
        strides=(1, 1)))
    # pool4
    model.add(MaxPooling2D((2, 2), strides=(2, 2),
                           padding='same'))
    # Conv 11-13
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=512,
        strides=(1, 1)))
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=512,
        strides=(1, 1)))
    model.add(Conv2D(
        kernel_size=(3, 3),
        activation="relu",
        filters=512,
        strides=(1, 1)))

    # pool5
    model.add(MaxPooling2D((2, 2), strides=(2, 2), padding='same'))

    # fully connected layer 1
    model.add(Flatten())
    model.add(Dense(2048))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    # fully connected layer 2
    model.add(Dense(2048))
    model.add(BatchNormalization())
    model.add(Activation('relu', name="feature"))
    model.add(Dropout(0.5))

    model.add(Dense(num_classes))
    model.add(Activation('softmax', name='predictions'))

    return model


# 面部分類
# 源代碼: https://github.com/oarriaga/face_classification/blob/master/src/models/cnn.py
def tiny_XCEPTION(input_shape, num_classes, l2_regularization=0.01):
    regularization = l2(l2_regularization)

    # base
    img_input = Input(input_shape, name="input")
    x = Conv2D(5, (3, 3), strides=(1, 1), kernel_regularizer=regularization,
               use_bias=False)(img_input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(5, (3, 3), strides=(1, 1), kernel_regularizer=regularization,
               use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # module 1
    residual = Conv2D(8, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = SeparableConv2D(8, (3, 3), padding='same',
                        kernel_regularizer=regularization,
                        use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(8, (3, 3), padding='same',
                        kernel_regularizer=regularization,
                        use_bias=False)(x)
    x = BatchNormalization()(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = layers.add([x, residual])

    # module 2
    residual = Conv2D(16, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = SeparableConv2D(16, (3, 3), padding='same',
                        kernel_regularizer=regularization,
                        use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(16, (3, 3), padding='same',
                        kernel_regularizer=regularization,
                        use_bias=False)(x)
    x = BatchNormalization()(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = layers.add([x, residual])

    # module 3
    residual = Conv2D(32, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = SeparableConv2D(32, (3, 3), padding='same',
                        kernel_regularizer=regularization,
                        use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(32, (3, 3), padding='same',
                        kernel_regularizer=regularization,
                        use_bias=False)(x)
    x = BatchNormalization()(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = layers.add([x, residual])

    # module 4
    residual = Conv2D(64, (1, 1), strides=(2, 2),
                      padding='same', use_bias=False)(x)
    residual = BatchNormalization()(residual)

    x = SeparableConv2D(64, (3, 3), padding='same',
                        kernel_regularizer=regularization,
                        use_bias=False)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = SeparableConv2D(64, (3, 3), padding='same',
                        kernel_regularizer=regularization,
                        use_bias=False)(x)
    x = BatchNormalization()(x)

    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
    x = layers.add([x, residual])

    x = Conv2D(1024, (3, 3),
               # kernel_regularizer=regularization,
               padding='same')(x)
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, name="feature")(x)
    x = Dropout(.5)(x)
    x = Dense(num_classes)(x)
    output = Activation('softmax', name='predictions')(x)
    model = Model(img_input, output)

    return model


def ResNet50(input_shape, num_classes):
    # wrap ResNet50 from keras, because ResNet50 is so deep.
    from keras.applications.resnet50 import ResNet50
    input_tensor = Input(shape=input_shape, name="input")
    x = ResNet50(include_top=False,
                 weights=None,
                 input_tensor=input_tensor,
                 input_shape=None,
                 pooling="avg",
                 classes=num_classes)
    x = Dense(units=2048, name="feature")(x.output)
    return Model(inputs=input_tensor, outputs=x)


# 實現於 ResNet's block.
# 實現了兩個 classes block: 一個是基本塊basic block,另一個是瓶頸塊bottleneck block
def basic_block(filters, kernel_size=3, is_first_block=True):
    stride = 1
    if is_first_block:
        stride = 2

    def f(x):
        # f(x) named y
        # 1st Conv
        y = ZeroPadding2D(padding=1)(x)
        y = Conv2D(filters, kernel_size, strides=stride, kernel_initializer='he_normal')(y)
        y = BatchNormalization()(y)
        y = Activation("relu")(y)
        # 2nd Conv
        y = ZeroPadding2D(padding=1)(y)
        y = Conv2D(filters, kernel_size, kernel_initializer='he_normal')(y)
        y = BatchNormalization()(y)

        # f(x) + x
        if is_first_block:
            shortcut = Conv2D(filters, kernel_size=1, strides=stride, kernel_initializer='he_normal')(x)
            shortcut = BatchNormalization()(shortcut)
        else:
            shortcut = x

        y = Add()([y, shortcut])
        y = Activation("relu")(y)

        return y

    return f


# ResNet v1, we can see the paper at:
# https://arxiv.org/abs/1512.03385
def ResNet18(input_shape, num_classes):
    input_layer = Input(shape=input_shape, name="input")

    # Conv1
    x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(input_layer)
    x = layers.Conv2D(64, (7, 7),
                      strides=(2, 2),
                      padding='valid',
                      kernel_initializer='he_normal',
                      name='conv1')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
    x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)

    # Conv2
    x = basic_block(filters=64)(x)
    x = basic_block(filters=64, is_first_block=False)(x)

    # Conv3
    x = basic_block(filters=128)(x)
    x = basic_block(filters=128, is_first_block=False)(x)

    # Conv4
    x = basic_block(filters=256)(x)
    x = basic_block(filters=256, is_first_block=False)(x)

    # Conv5
    x = basic_block(filters=512)(x)
    x = basic_block(filters=512, is_first_block=False)(x)

    x = GlobalAveragePooling2D(name="feature")(x)
    output_layer = Dense(num_classes, activation='softmax')(x)

    model = Model(input_layer, output_layer)
    return model

amsoftmax.py

import tensorflow as tf
from keras import backend as K
from keras.layers import Dropout
from keras.engine.topology import Layer
from keras.models import Model


class AMSoftmax(Layer):
    def __init__(self, units, **kwargs):
        self.units = units
        self.kernel = None
        super(AMSoftmax, self).__init__(**kwargs)

    # 類(初始化值)(call函數傳入值):即創建完實例對象後就執行call函數,
    # 那麼類中的執行順序爲先執行 _init__函數,然後執行 build函數,最後執行 call函數
    def build(self, input_shape):
        assert len(input_shape) >= 2
        self.kernel = self.add_weight(name='kernel',
                                      shape=(input_shape[1], self.units),
                                      initializer='uniform',
                                      trainable=True)
        super(AMSoftmax, self).build(input_shape)

    #作爲模型輸出層的輸出
    def call(self, inputs, **kwargs):
        # 得到餘弦相似性
        # cosine餘弦 = x * w / (||x|| * ||w||)
        inputs = K.l2_normalize(inputs, axis=1)
        kernel = K.l2_normalize(self.kernel, axis=0)
        cosine = K.dot(inputs, kernel)
        return cosine

    def compute_output_shape(self, input_shape):
        return input_shape[0], self.units

    def get_config(self):
        config = {
            'units': self.units}
        base_config = super(AMSoftmax, self).get_config()

        return dict(list(base_config.items())
                    + list(config.items()))


# 參考自: https://github.com/hao-qiang/AM-Softmax/blob/master/AM-Softmax.ipynb
def amsoftmax_loss(y_true, y_pred, scale=30.0, margin=0.35):
    # 定義兩個常量張量
    m = K.constant(margin, name='m')
    s = K.constant(scale, name='s')
    # 重塑標籤label
    label = K.reshape(K.argmax(y_true, axis=-1), shape=(-1, 1))
    label = K.cast(label, dtype=tf.int32)
    #pred_batch批預處理
    pred_batch = K.reshape(tf.range(K.shape(y_pred)[0]), shape=(-1, 1))
    #concat兩個列向量,一個是pred_batch批預處理,另一個是標籤label。
    ground_truth_indices = tf.concat([pred_batch,
                                      K.reshape(label, shape=(-1, 1))], axis=1)
    """
    參考自:https://tensorflow.google.cn/api_docs/python/tf/gather_nd?hl=en
    tf.gather_nd(params, indices, batch_dims=0, name=None)
    根據indices索引對params矩陣/向量進行元素操作
        indices = [[0, 0], [1, 1]]
        params = [['a', 'b'], ['c', 'd']]
        output = ['a', 'd']
        
        indices = [[1], [0]]
        params = [['a', 'b'], ['c', 'd']]
        output = [['c', 'd'], ['a', 'b']]
    """
    # get ground truth scores by indices
    ground_truth_scores = tf.gather_nd(y_pred, ground_truth_indices)

    # 如果 ground_truth_scores > m, 則 ground_truth_scores = group_truth_score - m
    #K.greater(x, y)用法:如果x>y,則返回 布爾張量
    added_margin = K.cast(K.greater(ground_truth_scores, m), dtype=tf.float32) * m
    added_margin = K.reshape(added_margin, shape=(-1, 1))
    #tf.subtract 減法
    added_embedding_feature = tf.subtract(y_pred, y_true * added_margin) * s
    #計算logits和labels之間的softmax cross entropy交叉熵
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_true, logits=added_embedding_feature)
    loss = tf.reduce_mean(cross_entropy)
    return loss


def wrap_cnn(model, feature_layer, input_shape, num_classes):
    cnn = model(input_shape, num_classes)
    assert isinstance(cnn, Model)
    #獲取feature (GlobalAveragePooling2D)的輸出
    x = cnn.get_layer(name=feature_layer).output
    x = Dropout(.5)(x)
    #1.類(初始化值)(call函數傳入值):即創建完實例對象後就執行call函數,
    # 那麼類中的執行順序爲先執行 _init__函數,然後執行 build函數,最後執行 call函數
    #2.predictions (AMSoftmax) 作爲模型輸出,自定義AM-Softmax損失函數和AM-Softmax網絡層
    output_layer = AMSoftmax(num_classes, name="predictions")(x)
    return Model(inputs=cnn.input, outputs=output_layer)


def load_model(filepath):
    import keras.models
    model = keras.models.load_model(filepath,
                                    {"AMSoftmax": AMSoftmax,
                                     "amsoftmax_loss": amsoftmax_loss})
    return model

evaluate.py

# /usr/bin env python
import os

import cv2
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import roc_curve, auc

from data.olivetti_faces.split_img import split_to_dataset
from model.amsoftmax import load_model
from utils.feature import get_feature_function
from utils.measure import kappa, cosine_similarity

model_path = "./trained_models/tiny_XCEPTION.hdf5"
img_path = "./data/olivetti_faces/olivettifaces.jpg"
test_data_path = "./olive"
input_shape = (64, 64, 1)

def classifier():
    model = load_model(filepath=model_path)
    files = list(os.walk(test_data_path))[0][2]
    x_list = []
    total = 0
    correct = 0
    matrix = np.zeros(shape=(20, 20))
    for file in files:
        label = file.split("_")[0].replace("olive", "")
        img = cv2.imread(test_data_path + file)
        img = cv2.resize(img, (64, 64))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        img = img / 256
        img = np.expand_dims(img, -1)
        img = np.expand_dims(img, 0)
        x_list.append(img)
        y = model.predict(x=img)
        y = int(np.argmax(y) / 2)

        y_correct = int(label)
        total += 1
        if y == y_correct:
            correct += 1
        matrix[y_correct][y] += 1

    k = kappa(matrix=matrix)
    print("total is {0}, precise is {1}, kappa is {2}."
          .format(total, correct / total, k))


def recognition():
    # This threshold is used to determine if two face images belong to the same person.
    threshold = 0.80

    model = load_model(filepath=model_path)
    f = get_feature_function(model)
    base_feature = f(cv2.imread("./olive/0_0.jpg"))

    y_true = []
    for i in range(200):
        if i < 10:
            y_true.append(1)  # True
        else:
            y_true.append(0)  # False
    y_score = []
    for label in range(20):
        for photo in range(10):
            file = "./olive/" + str(label) + "_" + str(photo) + ".jpg"
            img_feature = f(cv2.imread(file))
            sim = cosine_similarity(base_feature, img_feature)
            print("label:{0} - {1} ,sim : {2}".format(label, photo, sim))
            if sim > threshold:
                y_score.append(1)  # True
            else:
                y_score.append(0)  # False
    correct = 0
    for i in range(200):
        if y_true[i] == y_score[i]:
            correct += 1

    print("acc is " + str(correct / 200))
    fpr, tpr, t = roc_curve(y_true, y_score)
    roc_auc = auc(fpr, tpr)

    plt.figure()
    lw = 2
    plt.figure(figsize=(10, 10))
    plt.plot(fpr, tpr, color='darkorange',
             lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='-.')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC curve')
    plt.legend(loc="lower right")
    plt.show()


if __name__ == '__main__':
    if not os.path.exists(test_data_path):
        os.mkdir(test_data_path)
    # generate_more_faces(, test_data_path)
    split_to_dataset(img_path, test_data_path)
    recognition()

predict.py

import os
import cv2
from utils.feature import get_feature_function
from utils.measure import *

model_path = "./trained_models/tiny_XCEPTION.hdf5"

def main():
    get_feature = get_feature_function(model=model_path)
    features = []
    base_feature = None
    dir_list = list(list(os.walk("./data/manual_testing"))[0])[2]
    dir_list.sort()
    for file in dir_list:
        path = "./data/manual_testing/" + file
        img = cv2.imread(path)
        feature = get_feature(img)
        features.append((file, feature))
        if file == "base.jpg":
            base_feature = feature

    for file, feature in features:
        print(file, '\t',
              cosine_similarity(feature, base_feature), '\t',
              euclidean_metric(feature, base_feature), '\t',
              pearson_correlation(feature, base_feature))


if __name__ == '__main__':
    main()

"""
base.jpg 	 1.0000000596046448 	 1.0 	 0.99999994
base_full.jpg 	 0.8650757670402527 	 0.0014323909546049918 	 0.73107094
lena.jpg 	 0.7991840243339539 	 0.0010432298559344395 	 0.59929365
man1.jpg 	 0.6370709836483002 	 0.0031209503507164146 	 0.2738905
man1_2.jpg 	 0.5672858357429504 	 0.002656866875466825 	 0.13452913
man2.jpg 	 0.48662818130105734 	 0.0027977924693378836 	 -0.026793957
woman1_crop.jpg 	 0.8841563165187836 	 0.00508468014331387 	 0.76889646
woman1_full.jpg 	 0.8546876013278961 	 0.0016342116788881774 	 0.70945275
woman2_crop.jpg 	 0.7393457293510437 	 0.002660944596353025 	 0.47877395
woman2_full.jpg 	 0.8256216049194336 	 0.0009339273743781038 	 0.65218705
woman3_crop.jpg 	 0.8040041327476501 	 0.003071616047994001 	 0.6079379
woman3_full.jpg 	 0.8212800323963165 	 0.0013429052489530274 	 0.6433183
woman4_crop.jpg 	 0.696440264582634 	 0.0032429208812613745 	 0.3926806
woman4_full.jpg 	 0.8566377758979797 	 0.0015620006726975552 	 0.71384853
woman5.jpg 	 0.9146099090576172 	 0.001529003613932187 	 0.8297442
woman5_crop.jpg 	 0.9101028144359589 	 0.005750450657876265 	 0.8201517
"""
web.py

import cv2
from tempfile import SpooledTemporaryFile
import numpy as np
from flask import Flask
from flask import request
from utils.feature import get_feature_function
from utils.measure import cosine_similarity

model_path = "./trained_models/tiny_XCEPTION.hdf5"
get_feature = get_feature_function(model=model_path)

app = Flask(__name__, static_folder="web_static")
# if we save file to disk, we must use the following configuration.
# upload_folder = './web_static/uploads/'
# app.config['UPLOAD_FOLDER'] = upload_folder
app.config['ALLOWED_EXTENSIONS'] = {'png', 'jpg', 'jpeg', 'gif', 'bmp'}


@app.route("/")
def hello():
    return "Hello, SmooFaceEngine!"


@app.route("/test")
def test():
    html = '''
        <!DOCTYPE html>
        <html lang="en">
        <head>
            <meta charset="UTF-8">
            <title>Uploading</title>
        </head>
        <body>
            <form action="/data" method="post" enctype="multipart/form-data">
                <input type="file" name="pic1" value="Pic1" /><br>
                <input type="file" name="pic2" value="Pic2" /><br>
                <input type="submit" value="upload">
            </form>
        </body>
        </html>
    '''
    return html


def get_feature_from_client(request_filename):
    # If we want to save this file to disk, we can use the
    # following code. But if we should save the binary file from client to disk,
    # the program would run slowly.
    """
    import random
    def get_random_string(length):
        string = ""
        for i in range(0, length):
            code = random.randint(97, 122)
            string += chr(code)
        return string

    pic = request.files[request_filename]
    img_type = pic.filename.split('.')[1]
    filename = get_random_string(10) + "." + img_type
    filepath = os.path.join(app.root_path,
                            app.config['UPLOAD_FOLDER'],
                            filename)
    pic.save(filepath)
    vector = get_feature(filepath)
    os.unlink(filepath)

    return vector
    """

    # the following codes:
    # We will save the file from client to memory, then
    # the program run much faster than saving it to disk.
    file = request.files[request_filename]
    stream = file.stream
    # for old version flask:
    """
     if isinstance(stream, SpooledTemporaryFile):
         stream = stream.file
    """
    value = bytearray(stream.read())
    value = np.asarray(value, dtype='uint8')
    img = cv2.imdecode(value, 1)
    vector = get_feature(img)
    return vector


@app.route("/data", methods=["POST"])
def predict():
    vector1 = get_feature_from_client('pic1')
    vector2 = get_feature_from_client('pic2')
    similarity = cosine_similarity(vector1, vector2)
    return str(similarity)


if __name__ == "__main__":
    app.run(host='0.0.0.0', port=8080, debug=True)

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章