Aerial Cactus Identification(空中仙人掌鑑定)

Aerial Cactus Identification 空中仙人掌鑑定

二分類問題

方案一:

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os,cv2
from IPython.display import Image
from keras.preprocessing import image
from keras import optimizers
from keras import layers,models
from keras.applications.imagenet_utils import preprocess_input
import matplotlib.pyplot as plt
import seaborn as sns
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16

import numpy as np

train_path=r'.\input\train'
test_path=r'.\input\test'
train=pd.read_csv(r'.\input\train.csv')
test_df=pd.read_csv(r'.\input\sample_submission.csv')
train.has_cactus=train.has_cactus.astype(str)

#Data preparation
datagen=ImageDataGenerator(rescale=1./255)
batch_size=150

train_generator=datagen.flow_from_dataframe(dataframe=train[:15001],directory=train_path,x_col='id',
                                            y_col='has_cactus',class_mode='binary',batch_size=batch_size,
                                            target_size=(150,150))


validation_generator=datagen.flow_from_dataframe(dataframe=train[15000:],directory=train_path,x_col='id',
                                                y_col='has_cactus',class_mode='binary',batch_size=50,
                                                target_size=(150,150))

def cnn_model():
    model=models.Sequential()
    model.add(layers.Conv2D(32,(3,3),activation='relu',input_shape=(150,150,3)))
    model.add(layers.MaxPool2D((2,2)))
    model.add(layers.Conv2D(64,(3,3),activation='relu',input_shape=(150,150,3)))
    model.add(layers.MaxPool2D((2,2)))
    model.add(layers.Conv2D(128,(3,3),activation='relu',input_shape=(150,150,3)))
    model.add(layers.MaxPool2D((2,2)))
    model.add(layers.Conv2D(128,(3,3),activation='relu',input_shape=(150,150,3)))
    model.add(layers.MaxPool2D((2,2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(512,activation='relu'))
    model.add(layers.Dense(1,activation='sigmoid'))

    model.compile(loss='binary_crossentropy',optimizer=optimizers.rmsprop(),metrics=['acc'])

    epochs=100
    history=model.fit_generator(train_generator,steps_per_epoch=100,epochs=epochs,validation_data=validation_generator,validation_steps=50)

    X_tst = []
    Test_imgs = []
    for img_id in os.listdir(test_path):
        X_tst.append(cv2.imread(os.path.join(test_path,img_id)))
        Test_imgs.append(img_id)
    X_tst = np.asarray(X_tst)
    X_tst = X_tst.astype('float32')
    X_tst /= 255

    test_predictions = model.predict(X_tst)

    sub_df = pd.DataFrame(test_predictions, columns=['has_cactus'])
    sub_df['has_cactus'] = sub_df['has_cactus'].apply(lambda x: 1 if x > 0.75 else 0)

    sub_df['id'] = ''
    cols = sub_df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    sub_df=sub_df[cols]
    for i, img in enumerate(Test_imgs):
        sub_df.set_value(i,'id',img)

    sub_df.to_csv('result2.csv',index=False)

def vgg16_model():
    pass


if __name__=='__main__':
    cnn_model()
    

方案二:VGG16遷移學習

import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import os
from tqdm import tqdm, tqdm_notebook
from keras.models import Sequential
from keras.layers import Activation, Dropout, Flatten, Dense
from keras.applications import VGG16
from keras.optimizers import Adam
import os

train_path=r'.\input\train'
test_path=r'.\input\test'
train_df=pd.read_csv(r'.\input\train.csv')
test_df=pd.read_csv(r'.\input\sample_submission.csv')

vgg16_net = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(32, 32, 3))

vgg16_net.trainable=False

model = Sequential()
model.add(vgg16_net)
model.add(Flatten())
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer=Adam(lr=1e-5),
              metrics=['accuracy'])


X_tr = []
Y_tr = []
imges = train_df['id'].values
for img_id in imges:
    X_tr.append(cv2.imread(os.path.join(train_path,img_id)))
    Y_tr.append(train_df[train_df['id'] == img_id]['has_cactus'].values[0])
X_tr = np.asarray(X_tr)
X_tr = X_tr.astype('float32')
X_tr /= 255
Y_tr = np.asarray(Y_tr)

batch_size = 32
nb_epoch = 1000

history = model.fit(X_tr, Y_tr,
              batch_size=batch_size,
              epochs=nb_epoch,
              validation_split=0.1,
              shuffle=True,
              verbose=2)

"""
with open('history.json', 'w') as f:
    json.dump(history.history, f)

history_df = pd.DataFrame(history.history)
history_df[['loss', 'val_loss']].plot()
history_df[['acc', 'val_acc']].plot()
plt.show()
"""

X_tst = []
Test_imgs = []
for img_id in os.listdir(test_path):
    X_tst.append(cv2.imread(os.path.join(test_path,img_id)))
    Test_imgs.append(img_id)
X_tst = np.asarray(X_tst)
X_tst = X_tst.astype('float32')
X_tst /= 255

test_predictions = model.predict(X_tst)

sub_df = pd.DataFrame(test_predictions, columns=['has_cactus'])
sub_df['has_cactus'] = sub_df['has_cactus'].apply(lambda x: 1 if x > 0.75 else 0)

sub_df['id'] = ''
cols = sub_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
sub_df=sub_df[cols]
for i, img in enumerate(Test_imgs):
    sub_df.set_value(i,'id',img)

sub_df.to_csv('result2.csv',index=False)


我的方案:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
import cv2
import os
from keras.callbacks import EarlyStopping

train_path=r'.\input\train'
test_path=r'.\input\test'
train_df=pd.read_csv(r'.\input\train.csv')
test_df=pd.read_csv(r'.\input\sample_submission.csv')

X_train=[]
Y_train=[]
for i in range(len(train_df)):
    img_path=os.path.join(train_path,train_df.id[i])
    image=np.array(cv2.imread(img_path))
    X_train.append(image)
    Y_train.append(train_df.has_cactus[i])

X_train=np.array(X_train)/255.0
Y_train=np.array(Y_train)
Y_train=to_categorical(Y_train,num_classes=2)
# Set the random seed
random_seed = 2

# Split the train and the validation set for the fitting
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size = 0.1, random_state=random_seed)

model_name='cnn_model.h5'
def train():
    # Define the model
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same',
                     activation='relu', input_shape=(32, 32, 3)))
    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same',
                     activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='Same',
                     activation='relu'))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='Same',
                     activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(2, activation="softmax"))
    print(model.summary())
    # Define the optimizer
    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

    # Compile the model
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

    # Set a learning rate annealer
    learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc',
                                                patience=3,
                                                verbose=1,
                                                factor=0.5,
                                                min_lr=0.00001)

    epochs = 300  # Turn epochs to 30 to get 0.9967 accuracy
    batch_size = 32

    # Data augmentation
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range=0.1,  # Randomly zoom image
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    datagen.fit(X_train)
    el = EarlyStopping(min_delta=0.001, patience=5)
    # Fit the model
    history = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size),
                                  epochs=epochs, validation_data=(X_val, Y_val),
                                  verbose=2, steps_per_epoch=X_train.shape[0] // batch_size
                                  , callbacks=[learning_rate_reduction,el])

    model.save(model_name)

    # Plot the loss and accuracy curves for training and validation
    fig, ax = plt.subplots(2, 1)
    ax[0].plot(history.history['loss'], color='b', label="Training loss")
    ax[0].plot(history.history['val_loss'], color='r', label="validation loss", axes=ax[0])
    legend = ax[0].legend(loc='best', shadow=True)

    ax[1].plot(history.history['acc'], color='b', label="Training accuracy")
    ax[1].plot(history.history['val_acc'], color='r', label="Validation accuracy")
    legend = ax[1].legend(loc='best', shadow=True)
    plt.show()

def test():
    X_test=[]
    for i in range(len(test_df)):
        img_path = os.path.join(test_path, test_df.id[i])
        image = np.array(cv2.imread(img_path))
        X_test.append(image)


    X_test = np.array(X_test) / 255.0
    model=load_model(model_name)
    Y_pred=model.predict(X_test)
    Y_pred=np.argmax(Y_pred,axis=1)
    test_df.has_cactus=Y_pred
    test_df.to_csv('result.csv',index=False)


if __name__=='__main__':
    test()
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章