Dogs vs. Cats

二分類問題
我的方案:

import os
import sys
import cv2
import random
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import itertools
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from keras.optimizers import RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from keras.callbacks import EarlyStopping
from keras.preprocessing.image import load_img,img_to_array


import numpy as np
root_path=sys.path[0]
train_path=r'./input/train/'
test_path=r'./input/test/'
train_list=next(os.walk(root_path+r'\input\train'))[2]
test_list=next(os.walk(root_path+r'\input\test'))[2]


IMAGE_WIDTH=224
IMAGE_HEIGHT=224


# train_path="../input/train/"
# test_path="../input/test/"
# train_list=next(os.walk(train_path))[2]
# test_list=next(os.walk(test_path))[2]

# 根據圖片路徑獲取圖片標籤
def get_img_label(img_paths):
    img_labels = []

    for img_path in img_paths:
        animal = img_path.split("/")[-1].split('.')[0]
        if animal == 'cat':
            img_labels.append(0)
        else:
            img_labels.append(1)

    img_labels=to_categorical(img_labels,2)

    return img_labels


#讀取圖片
def load_batch_image(img_path,train_set=True,target_size=(IMAGE_WIDTH,IMAGE_HEIGHT)):
    im=load_img(img_path,target_size=target_size)
    if train_set:
        return img_to_array(im)
    else:
        return img_to_array(im)/255.0


#建立一個數據迭代器
def get_dataset_shuffle(X_sample,batch_size,train_set=True):
    random.shuffle(X_sample)
    batch_num=int(len(X_sample)/batch_size)
    max_len=batch_num*batch_size
    X_sample=np.array(X_sample[:max_len])
    y_samples=get_img_label(X_sample)

    X_batches=np.split(X_sample,batch_num)
    y_batches=np.split(y_samples,batch_num)

    for i in range(len(X_batches)):
        if train_set:
            x=np.array(list(map(load_batch_image,X_batches[i],[True for _ in range(batch_size)])))

        else:
            x=np.array(list(map(load_batch_image,X_batches[i],[False for _ in range(batch_size)])))

        y=np.array(y_batches[i])

        yield x,y

#數據增強處理

train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range=10,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)


def build_model():
    # Define the model
    model = Sequential()
    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same',
                     activation='relu', input_shape=(IMAGE_WIDTH, IMAGE_HEIGHT, 3)))
    model.add(Conv2D(filters=32, kernel_size=(5, 5), padding='Same',
                     activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='Same',
                     activation='relu'))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='Same',
                     activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(2, activation="sigmoid"))

    # Define the optimizer
    optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)

    # Compile the model
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])


    return model

def train():
    model=build_model()

    train_X=[train_path+item for item in train_list]
    # Set the random seed
    random_seed = 2

    # Split the train and the validation set for the fitting
    train_X, val_X = train_test_split(train_X, test_size = 0.1, random_state=random_seed)

    n_epoch=20
    batch_size=16
    for e in range(n_epoch):
        print('epoch',e)
        batch_num=0
        loss_sum=np.array([0.0,0.0])
        for X_train,y_train in get_dataset_shuffle(train_X,batch_size,True):
            for X_batch,y_batch in train_datagen.flow(X_train,y_train,batch_size=batch_size):
                loss=model.train_on_batch(X_batch,y_batch)
                loss_sum+=loss
                batch_num+=1
                break#手動break

            if batch_num%200==0:
                print("epoch %s, batch %s: train_loss = %.4f, train_acc = %.4f" % (
                e, batch_num, loss_sum[0] / 200, loss_sum[1] / 200))
                loss_sum = np.array([0.0, 0.0])

        res=model.evaluate_generator(get_dataset_shuffle(val_X,batch_size,False),int(len(val_X)/batch_size))
        print("val_loss = %.4f, val_acc = %.4f: " % (res[0], res[1]))


    model.save('weight.h5')

def test():
    model=load_model('weight.h5')
    X_test_path=[test_path+item for item in test_list]
    results=[]
    for path in X_test_path:
        X_test=np.array(load_batch_image(path,False))
        X_test=np.expand_dims(X_test,axis=0)

        results.append(model.predict(X_test))

    results=np.array(results)

    results=np.argmax(results,axis=2)
    test_df=pd.read_csv('./input/sample_submission.csv')
    test_df['label']=results
    test_df.to_csv('result1.csv', index=False)

if __name__=='__main__':
    train()
    test()




發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章