TF中建立vgg16並實現七分類

實驗綜述

1.基於vgg16模型進行遷移學習;因爲總體數據集偏小,所以只訓練了最後一層的參數,而凍結所有其他層;當然當數據集增大時,可以向前多結凍幾層網絡進行訓練。

2.總體的實施步驟爲:

             (1)處理數據

             (2)建立vgg16網絡模型類

             (3)利用網絡模型類實現模型複用、載入數據、訓練模型與權重保存

              (4)使用訓練好的模型進行分類

 

 

3.所用數據集爲自行拍攝的照片,共七類,每類約30張圖;且每類圖像在各自文件夾中,對應於類名對各自文件夾命名。

4.所實現的vgg16結構爲:

input->conv->conv->pool->conv->conv->pool->conv->conv->conv->pool->conv->conv->conv->pool->conv->conv->conv->pool->

fc->fc->fc->softmax->output

處理數據

對於數據進行旋轉、加噪聲實現數據的擴增,具體代碼如下。

命名爲 預處理.py

import os
import cv2
import random
import numpy as np

#  添加椒鹽噪聲輔助函數
#  prob:噪聲比例
def sp_noise(image, prob):
  output = np.zeros(image.shape, np.uint8)
  thres = 1 - prob
  for i in range(image.shape[0]):
    for j in range(image.shape[1]):
      rdn = random.random()
      if rdn < prob:
        output[i][j] = 0
      elif rdn > thres:
        output[i][j] = 255
      else:
        output[i][j] = image[i][j]
  return output


# 定義預處理函數,實現在原目錄下批量處理圖像
def alter(path):
  s = os.listdir(path)  # 返回圖像名列表
  count = 0  # 計數
  for i in s:
    # print(i)  # 打印文件名
    document = os.path.join(path, i)  # 合成每張圖的絕對路徑
    img = cv2.imread(document)  # 載入圖像

    img = cv2.resize(img, (224, 224))  # 尺寸重設
    img_hflip = cv2.flip(img, 1)  # 水平鏡像
    img_vflip = cv2.flip(img, 0)  # 垂直鏡像
    img_hvflip = cv2.flip(img, -1)  # 水平垂直鏡像
    img_sp = sp_noise(img, 0.03)  # 椒鹽噪聲
    rows, cols = img.shape[:2]
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 90, 1)
    img_nt = cv2.warpAffine(img, M, (cols, rows))
    new_path = os.path.join(path, str(count))+".jpg"
    new_path_h = os.path.join(path, str(count)) +"h"+ ".jpg"
    new_path_v = os.path.join(path, str(count)) + "v"+".jpg"
    new_path_hv = os.path.join(path, str(count)) +"hv"+ ".jpg"
    new_path_nt = os.path.join(path, str(count)) + "nt" + ".jpg"
    new_path_sp = os.path.join(path, str(count)) + "sp" + ".jpg"
    count += 1

    cv2.imwrite(new_path, img)
    print(new_path)
    cv2.imwrite(new_path_h, img_hflip)
    print(new_path_h)
    cv2.imwrite(new_path_v, img_vflip)
    print(new_path_v)
    cv2.imwrite(new_path_hv, img_hvflip)
    print(new_path_hv)
    cv2.imwrite(new_path_nt, img_nt)
    print(new_path_nt)
    cv2.imwrite(new_path_sp, img_sp)
    print(new_path_sp)
# 調用批量處理函數,對於路徑下的圖像進行處理
# 注意此處是子文件夾的路徑,也就是對於每個類的文件夾,都要填一遍運行一遍
alter('D:\\M\\twentyfive\\')

建立vgg16網絡模型類

命名爲vgg-model.py文件

import tensorflow as tf
import numpy as np
import os
from vgg_preprocessing import preprocess_for_train  # 此處引進tf官方的vgg圖像預處理模塊


class Vgg16:  # 定義vgg16類
    def __init__(self, imgs):
        self.parameters = []  # 全局列表,用來傳遞參數,或者說載入訓練好的模型的參數
        self.imgs = imgs  # 模型的輸入的圖像序列
        self.convlayers()  # 模型的卷積結構
        self.fc_layers()  # 模型的全連接結構
        self.probs = tf.nn.softmax(self.fc8)  # 模型的輸出

    def saver(self):  # 存儲對象,用於存儲模型
        return tf.train.Saver()
    # 池化實現函數
    # 節點名稱、輸入數據 
    def maxpool(self, name, input_data):  
        out = tf.nn.max_pool(input_data, [1, 2, 2, 1], [1, 2, 2, 1], padding='SAME', name=name)  # 池化參數如上設置,池化後圖像尺寸減爲1/4,即長寬各減一半
        return out
    # 2維卷積的實現函數
    # 節點名稱、輸入數據、輸出通道數、是否解凍
    def conv(self, name, input_data, out_channel, trainable=False):  
        in_channel = input_data.get_shape()[-1]  # 獲取輸入數據的通道數
        with tf.variable_scope(name):  3 使用命名空間,空間名由實參傳入
            # 定義卷積核 卷積核尺寸固定3X3,通道數由輸入數據決定
            kernel = tf.get_variable("weights", [3, 3, in_channel, out_channel], dtype=tf.float32, trainable=trainable)
            # 定義偏置
            biases = tf.get_variable("biases", [out_channel], dtype=tf.float32, trainable=trainable)
            # 卷積
            conv_res = tf.nn.conv2d(input_data, kernel, [1, 1, 1, 1], padding="SAME")
            # 偏置與激活
            res = tf.nn.bias_add(conv_res, biases)
            out = tf.nn.relu(res, name=name)  # 使用relu進行激活,效果不好的話,可以考慮使用s型函數
        self.parameters += [kernel, biases]  # 將參數加入到全局列表
        return out
    # 全連接層的實現函數
    # 節點名稱、輸入數據‘輸出通道、是否解凍
    def fc(self, name, input_data, out_channel, trainable=True): 
        # 根據輸入的維度獲取扁平化所需參數,即輸入通道數
        shape = input_data.get_shape().as_list()
        if len(shape) == 4:
            size = shape[-1]*shape[-2]*shape[-3]  # 如(-1,28,28,1) -》28X28X1=784
        else:
            size = shape[1]
        # 扁平化,這是卷積連接fc層前的必要操作
        input_data_flat = tf.reshape(input_data, [-1, size])

        with tf.variable_scope(name):
            weights = tf.get_variable(name="weights", shape=[size, out_channel], dtype=tf.float32, trainable=trainable)
            biases = tf.get_variable(name="biases", shape=[out_channel], dtype=tf.float32, trainable=trainable)
            res = tf.matmul(input_data_flat, weights)
            out = tf.nn.relu(tf.nn.bias_add(res, biases))
        self.parameters += [weights, biases]
        return out

    # 卷積層結構的構造函數
    def convlayers(self):  
        # conv1 第一層 輸入-》卷積1(64)-》卷積2(64)-》最大池化
        self.conv1_1 = self.conv("conv1_1", self.imgs, 64, trainable=False)
        self.conv1_2 = self.conv("conv1_2", self.conv1_1, 64, trainable=False)
        self.pool1 = self.maxpool("pool1", self.conv1_2)

        # conv2 第二層 第一層輸出-》卷積1(128)-》卷積2(128)-》最大池化
        self.conv2_1 = self.conv("conv2_1", self.pool1, 128, trainable=False)
        self.conv2_2 = self.conv("conv2_2", self.conv2_1, 128, trainable=False)
        self.pool2 = self.maxpool("pool2", self.conv2_2)

        # conv3 第三層 第二層輸出-》卷積1(256)-》卷積2(256)-》卷積3(256)-》最大池化
        self.conv3_1 = self.conv("conv3_1", self.pool2, 256, trainable=False)
        self.conv3_2 = self.conv("conv3_2", self.conv3_1, 256, trainable=False)
        self.conv3_3 = self.conv("conv3_3", self.conv3_2, 256, trainable=False)
        self.pool3 = self.maxpool("pool3", self.conv3_3)

        # conv4 第四層 第三層輸出-》卷積1(512)-》卷積2(512)-》卷積3(512)-》最大池化
        self.conv4_1 = self.conv("conv4_1", self.pool3, 512, trainable=False)
        self.conv4_2 = self.conv("conv4_2", self.conv4_1, 512, trainable=False)
        self.conv4_3 = self.conv("conv4_3", self.conv4_2, 512, trainable=False)
        self.pool4 = self.maxpool("pool4", self.conv4_3)

        # conv5 第五層 第四層輸出-》卷積1(512)-》卷積2(512)-》卷積3(512)-》最大池化
        self.conv5_1 = self.conv("conv5_1", self.pool4, 512, trainable=False)
        self.conv5_2 = self.conv("conv5_2", self.conv5_1, 512, trainable=False)
        self.conv5_3 = self.conv("conv5_3", self.conv5_2, 512, trainable=False)
        self.pool5 = self.maxpool("pool4", self.conv5_3)
    
    # 全連接層結構構造函數
    # 注意要根據自己的實際需要,修改最後一個全連接層的輸出通道數
    # 注意除了最後一層,所有的層trainable參數都是false即不解凍,只有最後一層參與訓練與參數更迭
    def fc_layers(self):  
        self.fc6 = self.fc("fc6", self.pool5, 4096, trainable=False)
        self.fc7 = self.fc("fc7", self.fc6, 4096, trainable=False)
        self.fc8 = self.fc("fc8", self.fc7, 7, trainable=True)  # 最後的參數取決於分類數目
    # 權重載入參數
    # 以字典的形式載入參數
    def load_weights(self, weight_file, sess):
        weights = np.load(weight_file)
        keys = sorted(weights.keys())
        for i, k in enumerate(keys):
            if i not in [30, 31]:
                sess.run(self.parameters[i].assign(weights[k]))
        print("_______model loaded_________")

# 獲取並載入數據
# 這個不是類中方法,是此模塊中的輔助函數,用於獲取類別與數據
# 輸入爲一個文件夾路徑,此實驗中,要將各類的文件夾放在一個大文件夾中,這個大文件夾就是此處的輸入
def get_file(file_dir):
    images = []
    temp = []
    for root, sub_folders, files in os.walk(file_dir):
        for name in files:
            images.append(os.path.join(root, name))
        for name in sub_folders:
            temp.append(os.path.join(root, name))
            labels = []
    for one_folder in temp:  # 對應於子文件夾名,將數據與標籤值進行對應
        n_img = len(os.listdir(one_folder))
        letter = one_folder.split('/')[-1]
        if letter == 'zero':
            labels = np.append(labels, n_img*[0])
        elif letter == 'five':
            labels = np.append(labels, n_img * [1])
        elif letter == 'ten':
            labels = np.append(labels, n_img * [2])
        elif letter == 'fifteen':
            labels = np.append(labels, n_img * [3])
        elif letter == 'twenty':
            labels = np.append(labels, n_img * [4])
        elif letter == 'twentyfive':
            labels = np.append(labels, n_img * [5])
        else:
            labels = np.append(labels, n_img * [6])

    # 打亂數據集
    temp = np.array([images, labels])
    temp = temp.transpose()
    np.random.shuffle(temp)
    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(float(i)) for i in label_list]

    return image_list, label_list  # 返回圖像集與標籤集


# 多線程批量讀取數據
# 輸入爲get_file函數返回的圖像集與標籤集、輸入圖像的尺寸、批大小以及一次載到緩存的大小
# 建議圖像尺寸爲vgg官方訓練時使用的224X224
# 後兩個參數要根據硬件的實際請款調節,避免內存爆掉
img_width = 224
img_height = 224
def get_batch(img_list, label_list, img_width, img_height, batch_size, capacity):
    image = tf.cast(img_list, tf.string)
    label = tf.cast(label_list, tf.int32)
    input_queue = tf.train.slice_input_producer([image, label])
    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])

    image = tf.image.decode_jpeg(image_contents, channels=3)
    image = preprocess_for_train(image, 224, 224)
    image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=64, capacity=capacity)

    label_batch = tf.reshape(label_batch, [batch_size])

    return image_batch, label_batch  # 返回值爲抽取到的圖像與標籤值

# 獨熱編碼
# 用於對標籤值進行處理,在cnn中廣泛使用,尤其是對於多分類問題
# 使用獨熱編碼有利於計算準確率以及其他類似於歐式距離的參數
def onehot(labels):
    n_sample = len(labels)
    n_class = 7  # 這個要對應修改爲實際類別
    onehot_labels = np.zeros((n_sample, n_class))
    onehot_labels[np.arange(n_sample), labels] = 1
    return onehot_labels

上述代碼中使用到了tf中的vgg預處理模塊;此模塊可以從tf的github上下載,也可以用下述的代碼塊。

# Copyright 2016 The TensorFlow Authors. All Rights Reserved.

#

# Licensed under the Apache License, Version 2.0 (the "License");

# you may not use this file except in compliance with the License.

# You may obtain a copy of the License at

#

# http://www.apache.org/licenses/LICENSE-2.0

#

# Unless required by applicable law or agreed to in writing, software

# distributed under the License is distributed on an "AS IS" BASIS,

# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

# See the License for the specific language governing permissions and

# limitations under the License.

# ==============================================================================

"""Provides utilities to preprocess images.



The preprocessing steps for VGG were introduced in the following technical

report:



  Very Deep Convolutional Networks For Large-Scale Image Recognition

  Karen Simonyan and Andrew Zisserman

  arXiv technical report, 2015

  PDF: http://arxiv.org/pdf/1409.1556.pdf

  ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf

  CC-BY-4.0



More information can be obtained from the VGG website:

www.robots.ox.ac.uk/~vgg/research/very_deep/

"""



from __future__ import absolute_import

from __future__ import division

from __future__ import print_function


import tensorflow as tf
from tensorflow.contrib import slim as contrib_slim

slim = contrib_slim

_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94



_RESIZE_SIDE_MIN = 256

_RESIZE_SIDE_MAX = 512





def _crop(image, offset_height, offset_width, crop_height, crop_width):

  """Crops the given image using the provided offsets and sizes.



  Note that the method doesn't assume we know the input image size but it does

  assume we know the input image rank.



  Args:

    image: an image of shape [height, width, channels].

    offset_height: a scalar tensor indicating the height offset.

    offset_width: a scalar tensor indicating the width offset.

    crop_height: the height of the cropped image.

    crop_width: the width of the cropped image.



  Returns:

    the cropped (and resized) image.



  Raises:

    InvalidArgumentError: if the rank is not 3 or if the image dimensions are

      less than the crop size.

  """

  original_shape = tf.shape(image)



  rank_assertion = tf.Assert(

      tf.equal(tf.rank(image), 3),

      ['Rank of image must be equal to 3.'])

  with tf.control_dependencies([rank_assertion]):

    cropped_shape = tf.stack([crop_height, crop_width, original_shape[2]])



  size_assertion = tf.Assert(

      tf.logical_and(

          tf.greater_equal(original_shape[0], crop_height),

          tf.greater_equal(original_shape[1], crop_width)),

      ['Crop size greater than the image size.'])



  offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))



  # Use tf.slice instead of crop_to_bounding box as it accepts tensors to

  # define the crop size.

  with tf.control_dependencies([size_assertion]):

    image = tf.slice(image, offsets, cropped_shape)

  return tf.reshape(image, cropped_shape)





def _random_crop(image_list, crop_height, crop_width):

  """Crops the given list of images.



  The function applies the same crop to each image in the list. This can be

  effectively applied when there are multiple image inputs of the same

  dimension such as:



    image, depths, normals = _random_crop([image, depths, normals], 120, 150)



  Args:

    image_list: a list of image tensors of the same dimension but possibly

      varying channel.

    crop_height: the new height.

    crop_width: the new width.



  Returns:

    the image_list with cropped images.



  Raises:

    ValueError: if there are multiple image inputs provided with different size

      or the images are smaller than the crop dimensions.

  """

  if not image_list:

    raise ValueError('Empty image_list.')



  # Compute the rank assertions.

  rank_assertions = []

  for i in range(len(image_list)):

    image_rank = tf.rank(image_list[i])

    rank_assert = tf.Assert(

        tf.equal(image_rank, 3),

        ['Wrong rank for tensor  %s [expected] [actual]',

         image_list[i].name, 3, image_rank])

    rank_assertions.append(rank_assert)



  with tf.control_dependencies([rank_assertions[0]]):

    image_shape = tf.shape(image_list[0])

  image_height = image_shape[0]

  image_width = image_shape[1]

  crop_size_assert = tf.Assert(

      tf.logical_and(

          tf.greater_equal(image_height, crop_height),

          tf.greater_equal(image_width, crop_width)),

      ['Crop size greater than the image size.'])



  asserts = [rank_assertions[0], crop_size_assert]



  for i in range(1, len(image_list)):

    image = image_list[i]

    asserts.append(rank_assertions[i])

    with tf.control_dependencies([rank_assertions[i]]):

      shape = tf.shape(image)

    height = shape[0]

    width = shape[1]



    height_assert = tf.Assert(

        tf.equal(height, image_height),

        ['Wrong height for tensor %s [expected][actual]',

         image.name, height, image_height])

    width_assert = tf.Assert(

        tf.equal(width, image_width),

        ['Wrong width for tensor %s [expected][actual]',

         image.name, width, image_width])

    asserts.extend([height_assert, width_assert])



  # Create a random bounding box.

  #

  # Use tf.random_uniform and not numpy.random.rand as doing the former would

  # generate random numbers at graph eval time, unlike the latter which

  # generates random numbers at graph definition time.

  with tf.control_dependencies(asserts):

    max_offset_height = tf.reshape(image_height - crop_height + 1, [])

  with tf.control_dependencies(asserts):

    max_offset_width = tf.reshape(image_width - crop_width + 1, [])

  offset_height = tf.random_uniform(

      [], maxval=max_offset_height, dtype=tf.int32)

  offset_width = tf.random_uniform(

      [], maxval=max_offset_width, dtype=tf.int32)



  return [_crop(image, offset_height, offset_width,

                crop_height, crop_width) for image in image_list]





def _central_crop(image_list, crop_height, crop_width):

  """Performs central crops of the given image list.



  Args:

    image_list: a list of image tensors of the same dimension but possibly

      varying channel.

    crop_height: the height of the image following the crop.

    crop_width: the width of the image following the crop.



  Returns:

    the list of cropped images.

  """

  outputs = []

  for image in image_list:

    image_height = tf.shape(image)[0]

    image_width = tf.shape(image)[1]



    offset_height = (image_height - crop_height) / 2

    offset_width = (image_width - crop_width) / 2



    outputs.append(_crop(image, offset_height, offset_width,

                         crop_height, crop_width))

  return outputs





def _mean_image_subtraction(image, means):

  """Subtracts the given means from each image channel.



  For example:

    means = [123.68, 116.779, 103.939]

    image = _mean_image_subtraction(image, means)



  Note that the rank of `image` must be known.



  Args:

    image: a tensor of size [height, width, C].

    means: a C-vector of values to subtract from each channel.



  Returns:

    the centered image.



  Raises:

    ValueError: If the rank of `image` is unknown, if `image` has a rank other

      than three or if the number of channels in `image` doesn't match the

      number of values in `means`.

  """

  if image.get_shape().ndims != 3:

    raise ValueError('Input must be of size [height, width, C>0]')

  num_channels = image.get_shape().as_list()[-1]

  if len(means) != num_channels:

    raise ValueError('len(means) must match the number of channels')



  channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)

  for i in range(num_channels):

    channels[i] -= means[i]

  return tf.concat(axis=2, values=channels)





def _smallest_size_at_least(height, width, smallest_side):

  """Computes new shape with the smallest side equal to `smallest_side`.



  Computes new shape with the smallest side equal to `smallest_side` while

  preserving the original aspect ratio.



  Args:

    height: an int32 scalar tensor indicating the current height.

    width: an int32 scalar tensor indicating the current width.

    smallest_side: A python integer or scalar `Tensor` indicating the size of

      the smallest side after resize.



  Returns:

    new_height: an int32 scalar tensor indicating the new height.

    new_width: and int32 scalar tensor indicating the new width.

  """

  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)



  height = tf.to_float(height)

  width = tf.to_float(width)

  smallest_side = tf.to_float(smallest_side)



  scale = tf.cond(tf.greater(height, width),

                  lambda: smallest_side / width,

                  lambda: smallest_side / height)

  new_height = tf.to_int32(tf.rint(height * scale))

  new_width = tf.to_int32(tf.rint(width * scale))

  return new_height, new_width





def _aspect_preserving_resize(image, smallest_side):

  """Resize images preserving the original aspect ratio.



  Args:

    image: A 3-D image `Tensor`.

    smallest_side: A python integer or scalar `Tensor` indicating the size of

      the smallest side after resize.



  Returns:

    resized_image: A 3-D tensor containing the resized image.

  """

  smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)



  shape = tf.shape(image)

  height = shape[0]

  width = shape[1]

  new_height, new_width = _smallest_size_at_least(height, width, smallest_side)

  image = tf.expand_dims(image, 0)

  resized_image = tf.image.resize_bilinear(image, [new_height, new_width],

                                           align_corners=False)

  resized_image = tf.squeeze(resized_image)

  resized_image.set_shape([None, None, 3])

  return resized_image





def preprocess_for_train(image,

                         output_height,

                         output_width,

                         resize_side_min=_RESIZE_SIDE_MIN,

                         resize_side_max=_RESIZE_SIDE_MAX,

                         use_grayscale=False):

  """Preprocesses the given image for training.



  Note that the actual resizing scale is sampled from

    [`resize_size_min`, `resize_size_max`].



  Args:

    image: A `Tensor` representing an image of arbitrary size.

    output_height: The height of the image after preprocessing.

    output_width: The width of the image after preprocessing.

    resize_side_min: The lower bound for the smallest side of the image for

      aspect-preserving resizing.

    resize_side_max: The upper bound for the smallest side of the image for

      aspect-preserving resizing.

    use_grayscale: Whether to convert the image from RGB to grayscale.



  Returns:

    A preprocessed image.

  """

  resize_side = tf.random_uniform(

      [], minval=resize_side_min, maxval=resize_side_max+1, dtype=tf.int32)



  image = _aspect_preserving_resize(image, resize_side)

  image = _random_crop([image], output_height, output_width)[0]

  image.set_shape([output_height, output_width, 3])

  image = tf.to_float(image)

  if use_grayscale:

    image = tf.image.rgb_to_grayscale(image)

  image = tf.image.random_flip_left_right(image)

  return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])





def preprocess_for_eval(image,

                        output_height,

                        output_width,

                        resize_side,

                        use_grayscale=False):

  """Preprocesses the given image for evaluation.



  Args:

    image: A `Tensor` representing an image of arbitrary size.

    output_height: The height of the image after preprocessing.

    output_width: The width of the image after preprocessing.

    resize_side: The smallest side of the image for aspect-preserving resizing.

    use_grayscale: Whether to convert the image from RGB to grayscale.



  Returns:

    A preprocessed image.

  """

  image = _aspect_preserving_resize(image, resize_side)

  image = _central_crop([image], output_height, output_width)[0]

  image.set_shape([output_height, output_width, 3])

  image = tf.to_float(image)

  if use_grayscale:

    image = tf.image.rgb_to_grayscale(image)

  return _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN])





def preprocess_image(image,

                     output_height,

                     output_width,

                     is_training=False,

                     resize_side_min=_RESIZE_SIDE_MIN,

                     resize_side_max=_RESIZE_SIDE_MAX,

                     use_grayscale=False):

  """Preprocesses the given image.



  Args:

    image: A `Tensor` representing an image of arbitrary size.

    output_height: The height of the image after preprocessing.

    output_width: The width of the image after preprocessing.

    is_training: `True` if we're preprocessing the image for training and

      `False` otherwise.

    resize_side_min: The lower bound for the smallest side of the image for

      aspect-preserving resizing. If `is_training` is `False`, then this value

      is used for rescaling.

    resize_side_max: The upper bound for the smallest side of the image for

      aspect-preserving resizing. If `is_training` is `False`, this value is

      ignored. Otherwise, the resize side is sampled from

        [resize_size_min, resize_size_max].

    use_grayscale: Whether to convert the image from RGB to grayscale.



  Returns:

    A preprocessed image.

  """

  if is_training:

    return preprocess_for_train(image, output_height, output_width,

                                resize_side_min, resize_side_max,

                                use_grayscale)

  else:

    return preprocess_for_eval(image, output_height, output_width,

                               resize_side_min, use_grayscale)

利用網絡模型類實現模型的複用 載入數據、訓練模型與權重保存

利用上文中建立的vgg16類進行模型的實例化與應用。

使用到了官方的預訓練模型,下載地址爲:https://www.cs.toronto.edu/~frossard/vgg16/vgg16_weights.npz

實例化時,應進行傳預訓練模型的參數,調整凍結與解凍來配置可訓練的網絡層。

具體的實現代碼如下所述。

from time import time
import numpy as np
import tensorflow as tf
import vgg_model as model
import os

start_time = time()  # 開始計時
batch_size = 32  # 單批次抽取數據量
capicity = 256  # 一次載入緩存的量
means = [113.68, 116.779, 103.939]  # 三通道均值,用於預處理圖像

# 載入數據,並進行抽取,定義輸入的佔位符
xs, ys = model.get_file("./M/")  # 填入總的文件路徑,要求各類樣本在不同的文件夾中
image_batch, label_batch = model.get_batch(xs, ys, 224, 224, batch_size, capicity)
x = tf.placeholder(tf.float32, [None, 224, 224, 3])
y = tf.placeholder(tf.float32, [None, 7])  # 要對應分類數目

# 使用模型時要改類中的fc最後一層,對應於類數目
vgg = model.Vgg16(x)
# 獲取前向計算結果
fc8_fintuning = vgg.probs
# 定義損失函數
loss_function = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc8_fintuning, labels=y))
# 定義優化器
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss_function)

# 啓動會話並初始化
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 載入預訓練模型
vgg.load_weights("vgg16_weights.npz", sess)
saver = tf.train.Saver()


# 開線程
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
epoch_start_time = time()

# 開始迭代訓練
for i in range(100000):
    images, labels = sess.run([image_batch, label_batch])
    # print("y src shape: ", labels.shape)
    print("y src: ", labels)
    labels = model.onehot(labels)
    # print("y shape: ", labels.shape)
    print("labels: ", labels)
    sess.run(optimizer, feed_dict={x: images, y: labels})
    loss = sess.run(loss_function, feed_dict={x: images, y: labels})
    print("loss: ", loss)
    epoch_end_time = time()
    print("當前輪次耗時:", (epoch_end_time-epoch_start_time))
    epoch_start_time = epoch_end_time
    if (i+1) % 500 == 0:
        saver.save(sess, os.path.join("./model/", "epoch{:06d}.ckpt".format(i)))
    print("epoch %d is finished!!" % i)
# 保存模型
saver.save(sess, "./model/")
print("optimizer finish!!")
duration = time()-start_time
print("全程耗時:", "{:.2f}".format(duration))
# 關閉線程
coord.request_stop()
coord.join(threads)



使用訓練好的模型進行分類

撒啊

import tensorflow as tf
import vgg_model as model
import cv2
import numpy as np

# 定義輸入
x = tf.placeholder(tf.float32, [None, 224, 224, 3])
# 開啓會話
sess = tf.Session()
# 實例化模型
vgg = model.Vgg16(x)
# 獲取前向計算值
fc8_fintuning = vgg.probs
# 定義存儲對象
saver = tf.train.Saver()
# 開始載入訓練好的模型
# 注意訓練的網絡與分類的網絡必須要同結構,否則不會成功載入
print("Model restoring: ....")
saver.restore(sess, "./model/")  # 恢復最新權重
# saver.restore(sess, "./model/epoch_0080.ckpt")  # 指定恢復某一版本權重

file_path = "./M/twentyfive/4.jpg"  # 填入待測圖像路徑
img = cv2.imread(file_path)
# img = cv2.resize(img, [224, 224, 3])  # 可以重定義尺寸,防止輸入尺寸不是規定的224X224
img = img.astype(np.float32)

# 求解預測值
preb = sess.run(fc8_fintuning, feed_dict={x: [img]})
# 取得最大預測值索引
max_index = np.argmax(preb)
print(preb)
# 打印預測結果
print("判斷爲%d米" % (5*max_index))
print("置信度爲:", preb[:, max_index])

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章